1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
47 #include "integrate.h"
50 #include "target-def.h"
52 #include "langhooks.h"
53 #include <splay-tree.h>
54 #include "cfglayout.h"
56 #include "tree-flow.h"
57 #include "tree-stdarg.h"
58 #include "tm-constrs.h"
62 /* Specify which cpu to schedule for. */
63 enum processor_type alpha_tune;
65 /* Which cpu we're generating code for. */
66 enum processor_type alpha_cpu;
68 static const char * const alpha_cpu_name[] =
73 /* Specify how accurate floating-point traps need to be. */
75 enum alpha_trap_precision alpha_tp;
77 /* Specify the floating-point rounding mode. */
79 enum alpha_fp_rounding_mode alpha_fprm;
81 /* Specify which things cause traps. */
83 enum alpha_fp_trap_mode alpha_fptm;
85 /* Nonzero if inside of a function, because the Alpha asm can't
86 handle .files inside of functions. */
88 static int inside_function = FALSE;
90 /* The number of cycles of latency we should assume on memory reads. */
92 int alpha_memory_latency = 3;
94 /* Whether the function needs the GP. */
96 static int alpha_function_needs_gp;
98 /* The alias set for prologue/epilogue register save/restore. */
100 static GTY(()) alias_set_type alpha_sr_alias_set;
102 /* The assembler name of the current function. */
104 static const char *alpha_fnname;
106 /* The next explicit relocation sequence number. */
107 extern GTY(()) int alpha_next_sequence_number;
108 int alpha_next_sequence_number = 1;
110 /* The literal and gpdisp sequence numbers for this insn, as printed
111 by %# and %* respectively. */
112 extern GTY(()) int alpha_this_literal_sequence_number;
113 extern GTY(()) int alpha_this_gpdisp_sequence_number;
114 int alpha_this_literal_sequence_number;
115 int alpha_this_gpdisp_sequence_number;
117 /* Costs of various operations on the different architectures. */
119 struct alpha_rtx_cost_data
121 unsigned char fp_add;
122 unsigned char fp_mult;
123 unsigned char fp_div_sf;
124 unsigned char fp_div_df;
125 unsigned char int_mult_si;
126 unsigned char int_mult_di;
127 unsigned char int_shift;
128 unsigned char int_cmov;
129 unsigned short int_div;
132 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
135 COSTS_N_INSNS (6), /* fp_add */
136 COSTS_N_INSNS (6), /* fp_mult */
137 COSTS_N_INSNS (34), /* fp_div_sf */
138 COSTS_N_INSNS (63), /* fp_div_df */
139 COSTS_N_INSNS (23), /* int_mult_si */
140 COSTS_N_INSNS (23), /* int_mult_di */
141 COSTS_N_INSNS (2), /* int_shift */
142 COSTS_N_INSNS (2), /* int_cmov */
143 COSTS_N_INSNS (97), /* int_div */
146 COSTS_N_INSNS (4), /* fp_add */
147 COSTS_N_INSNS (4), /* fp_mult */
148 COSTS_N_INSNS (15), /* fp_div_sf */
149 COSTS_N_INSNS (22), /* fp_div_df */
150 COSTS_N_INSNS (8), /* int_mult_si */
151 COSTS_N_INSNS (12), /* int_mult_di */
152 COSTS_N_INSNS (1) + 1, /* int_shift */
153 COSTS_N_INSNS (1), /* int_cmov */
154 COSTS_N_INSNS (83), /* int_div */
157 COSTS_N_INSNS (4), /* fp_add */
158 COSTS_N_INSNS (4), /* fp_mult */
159 COSTS_N_INSNS (12), /* fp_div_sf */
160 COSTS_N_INSNS (15), /* fp_div_df */
161 COSTS_N_INSNS (7), /* int_mult_si */
162 COSTS_N_INSNS (7), /* int_mult_di */
163 COSTS_N_INSNS (1), /* int_shift */
164 COSTS_N_INSNS (2), /* int_cmov */
165 COSTS_N_INSNS (86), /* int_div */
169 /* Similar but tuned for code size instead of execution latency. The
170 extra +N is fractional cost tuning based on latency. It's used to
171 encourage use of cheaper insns like shift, but only if there's just
174 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
176 COSTS_N_INSNS (1), /* fp_add */
177 COSTS_N_INSNS (1), /* fp_mult */
178 COSTS_N_INSNS (1), /* fp_div_sf */
179 COSTS_N_INSNS (1) + 1, /* fp_div_df */
180 COSTS_N_INSNS (1) + 1, /* int_mult_si */
181 COSTS_N_INSNS (1) + 2, /* int_mult_di */
182 COSTS_N_INSNS (1), /* int_shift */
183 COSTS_N_INSNS (1), /* int_cmov */
184 COSTS_N_INSNS (6), /* int_div */
187 /* Get the number of args of a function in one of two ways. */
188 #if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK
189 #define NUM_ARGS crtl->args.info.num_args
191 #define NUM_ARGS crtl->args.info
197 /* Declarations of static functions. */
198 static struct machine_function *alpha_init_machine_status (void);
199 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
201 #if TARGET_ABI_OPEN_VMS
202 static void alpha_write_linkage (FILE *, const char *, tree);
205 static void unicosmk_output_deferred_case_vectors (FILE *);
206 static void unicosmk_gen_dsib (unsigned long *);
207 static void unicosmk_output_ssib (FILE *, const char *);
208 static int unicosmk_need_dex (rtx);
210 /* Implement TARGET_HANDLE_OPTION. */
213 alpha_handle_option (size_t code, const char *arg, int value)
219 target_flags |= MASK_SOFT_FP;
223 case OPT_mieee_with_inexact:
224 target_flags |= MASK_IEEE_CONFORMANT;
228 if (value != 16 && value != 32 && value != 64)
229 error ("bad value %qs for -mtls-size switch", arg);
236 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
237 /* Implement TARGET_MANGLE_TYPE. */
240 alpha_mangle_type (const_tree type)
242 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
243 && TARGET_LONG_DOUBLE_128)
246 /* For all other types, use normal C++ mangling. */
251 /* Parse target option strings. */
254 override_options (void)
256 static const struct cpu_table {
257 const char *const name;
258 const enum processor_type processor;
261 { "ev4", PROCESSOR_EV4, 0 },
262 { "ev45", PROCESSOR_EV4, 0 },
263 { "21064", PROCESSOR_EV4, 0 },
264 { "ev5", PROCESSOR_EV5, 0 },
265 { "21164", PROCESSOR_EV5, 0 },
266 { "ev56", PROCESSOR_EV5, MASK_BWX },
267 { "21164a", PROCESSOR_EV5, MASK_BWX },
268 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX },
269 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
270 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
271 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
272 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
273 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
274 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }
277 int const ct_size = ARRAY_SIZE (cpu_table);
280 /* Unicos/Mk doesn't have shared libraries. */
281 if (TARGET_ABI_UNICOSMK && flag_pic)
283 warning (0, "-f%s ignored for Unicos/Mk (not supported)",
284 (flag_pic > 1) ? "PIC" : "pic");
288 /* On Unicos/Mk, the native compiler consistently generates /d suffices for
289 floating-point instructions. Make that the default for this target. */
290 if (TARGET_ABI_UNICOSMK)
291 alpha_fprm = ALPHA_FPRM_DYN;
293 alpha_fprm = ALPHA_FPRM_NORM;
295 alpha_tp = ALPHA_TP_PROG;
296 alpha_fptm = ALPHA_FPTM_N;
298 /* We cannot use su and sui qualifiers for conversion instructions on
299 Unicos/Mk. I'm not sure if this is due to assembler or hardware
300 limitations. Right now, we issue a warning if -mieee is specified
301 and then ignore it; eventually, we should either get it right or
302 disable the option altogether. */
306 if (TARGET_ABI_UNICOSMK)
307 warning (0, "-mieee not supported on Unicos/Mk");
310 alpha_tp = ALPHA_TP_INSN;
311 alpha_fptm = ALPHA_FPTM_SU;
315 if (TARGET_IEEE_WITH_INEXACT)
317 if (TARGET_ABI_UNICOSMK)
318 warning (0, "-mieee-with-inexact not supported on Unicos/Mk");
321 alpha_tp = ALPHA_TP_INSN;
322 alpha_fptm = ALPHA_FPTM_SUI;
328 if (! strcmp (alpha_tp_string, "p"))
329 alpha_tp = ALPHA_TP_PROG;
330 else if (! strcmp (alpha_tp_string, "f"))
331 alpha_tp = ALPHA_TP_FUNC;
332 else if (! strcmp (alpha_tp_string, "i"))
333 alpha_tp = ALPHA_TP_INSN;
335 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
338 if (alpha_fprm_string)
340 if (! strcmp (alpha_fprm_string, "n"))
341 alpha_fprm = ALPHA_FPRM_NORM;
342 else if (! strcmp (alpha_fprm_string, "m"))
343 alpha_fprm = ALPHA_FPRM_MINF;
344 else if (! strcmp (alpha_fprm_string, "c"))
345 alpha_fprm = ALPHA_FPRM_CHOP;
346 else if (! strcmp (alpha_fprm_string,"d"))
347 alpha_fprm = ALPHA_FPRM_DYN;
349 error ("bad value %qs for -mfp-rounding-mode switch",
353 if (alpha_fptm_string)
355 if (strcmp (alpha_fptm_string, "n") == 0)
356 alpha_fptm = ALPHA_FPTM_N;
357 else if (strcmp (alpha_fptm_string, "u") == 0)
358 alpha_fptm = ALPHA_FPTM_U;
359 else if (strcmp (alpha_fptm_string, "su") == 0)
360 alpha_fptm = ALPHA_FPTM_SU;
361 else if (strcmp (alpha_fptm_string, "sui") == 0)
362 alpha_fptm = ALPHA_FPTM_SUI;
364 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
367 if (alpha_cpu_string)
369 for (i = 0; i < ct_size; i++)
370 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
372 alpha_tune = alpha_cpu = cpu_table [i].processor;
373 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
374 target_flags |= cpu_table [i].flags;
378 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
381 if (alpha_tune_string)
383 for (i = 0; i < ct_size; i++)
384 if (! strcmp (alpha_tune_string, cpu_table [i].name))
386 alpha_tune = cpu_table [i].processor;
390 error ("bad value %qs for -mcpu switch", alpha_tune_string);
393 /* Do some sanity checks on the above options. */
395 if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N)
397 warning (0, "trap mode not supported on Unicos/Mk");
398 alpha_fptm = ALPHA_FPTM_N;
401 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
402 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
404 warning (0, "fp software completion requires -mtrap-precision=i");
405 alpha_tp = ALPHA_TP_INSN;
408 if (alpha_cpu == PROCESSOR_EV6)
410 /* Except for EV6 pass 1 (not released), we always have precise
411 arithmetic traps. Which means we can do software completion
412 without minding trap shadows. */
413 alpha_tp = ALPHA_TP_PROG;
416 if (TARGET_FLOAT_VAX)
418 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
420 warning (0, "rounding mode not supported for VAX floats");
421 alpha_fprm = ALPHA_FPRM_NORM;
423 if (alpha_fptm == ALPHA_FPTM_SUI)
425 warning (0, "trap mode not supported for VAX floats");
426 alpha_fptm = ALPHA_FPTM_SU;
428 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
429 warning (0, "128-bit long double not supported for VAX floats");
430 target_flags &= ~MASK_LONG_DOUBLE_128;
437 if (!alpha_mlat_string)
438 alpha_mlat_string = "L1";
440 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
441 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
443 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
444 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
445 && alpha_mlat_string[2] == '\0')
447 static int const cache_latency[][4] =
449 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
450 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
451 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
454 lat = alpha_mlat_string[1] - '0';
455 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
457 warning (0, "L%d cache latency unknown for %s",
458 lat, alpha_cpu_name[alpha_tune]);
462 lat = cache_latency[alpha_tune][lat-1];
464 else if (! strcmp (alpha_mlat_string, "main"))
466 /* Most current memories have about 370ns latency. This is
467 a reasonable guess for a fast cpu. */
472 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
476 alpha_memory_latency = lat;
479 /* Default the definition of "small data" to 8 bytes. */
483 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
485 target_flags |= MASK_SMALL_DATA;
486 else if (flag_pic == 2)
487 target_flags &= ~MASK_SMALL_DATA;
489 /* Align labels and loops for optimal branching. */
490 /* ??? Kludge these by not doing anything if we don't optimize and also if
491 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
492 if (optimize > 0 && write_symbols != SDB_DEBUG)
494 if (align_loops <= 0)
496 if (align_jumps <= 0)
499 if (align_functions <= 0)
500 align_functions = 16;
502 /* Acquire a unique set number for our register saves and restores. */
503 alpha_sr_alias_set = new_alias_set ();
505 /* Register variables and functions with the garbage collector. */
507 /* Set up function hooks. */
508 init_machine_status = alpha_init_machine_status;
510 /* Tell the compiler when we're using VAX floating point. */
511 if (TARGET_FLOAT_VAX)
513 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
514 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
515 REAL_MODE_FORMAT (TFmode) = NULL;
518 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
519 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
520 target_flags |= MASK_LONG_DOUBLE_128;
523 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
524 can be optimized to ap = __builtin_next_arg (0). */
525 if (TARGET_ABI_UNICOSMK)
526 targetm.expand_builtin_va_start = NULL;
529 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
532 zap_mask (HOST_WIDE_INT value)
536 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
538 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
544 /* Return true if OP is valid for a particular TLS relocation.
545 We are already guaranteed that OP is a CONST. */
548 tls_symbolic_operand_1 (rtx op, int size, int unspec)
552 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
554 op = XVECEXP (op, 0, 0);
556 if (GET_CODE (op) != SYMBOL_REF)
559 switch (SYMBOL_REF_TLS_MODEL (op))
561 case TLS_MODEL_LOCAL_DYNAMIC:
562 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
563 case TLS_MODEL_INITIAL_EXEC:
564 return unspec == UNSPEC_TPREL && size == 64;
565 case TLS_MODEL_LOCAL_EXEC:
566 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
572 /* Used by aligned_memory_operand and unaligned_memory_operand to
573 resolve what reload is going to do with OP if it's a register. */
576 resolve_reload_operand (rtx op)
578 if (reload_in_progress)
581 if (GET_CODE (tmp) == SUBREG)
582 tmp = SUBREG_REG (tmp);
584 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
586 op = reg_equiv_memory_loc[REGNO (tmp)];
594 /* The scalar modes supported differs from the default check-what-c-supports
595 version in that sometimes TFmode is available even when long double
596 indicates only DFmode. On unicosmk, we have the situation that HImode
597 doesn't map to any C type, but of course we still support that. */
600 alpha_scalar_mode_supported_p (enum machine_mode mode)
608 case TImode: /* via optabs.c */
616 return TARGET_HAS_XFLOATING_LIBS;
623 /* Alpha implements a couple of integer vector mode operations when
624 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
625 which allows the vectorizer to operate on e.g. move instructions,
626 or when expand_vector_operations can do something useful. */
629 alpha_vector_mode_supported_p (enum machine_mode mode)
631 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
634 /* Return 1 if this function can directly return via $26. */
639 return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK
641 && alpha_sa_size () == 0
642 && get_frame_size () == 0
643 && crtl->outgoing_args_size == 0
644 && crtl->args.pretend_args_size == 0);
647 /* Return the ADDR_VEC associated with a tablejump insn. */
650 alpha_tablejump_addr_vec (rtx insn)
654 tmp = JUMP_LABEL (insn);
657 tmp = NEXT_INSN (tmp);
661 && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC)
662 return PATTERN (tmp);
666 /* Return the label of the predicted edge, or CONST0_RTX if we don't know. */
669 alpha_tablejump_best_label (rtx insn)
671 rtx jump_table = alpha_tablejump_addr_vec (insn);
672 rtx best_label = NULL_RTX;
674 /* ??? Once the CFG doesn't keep getting completely rebuilt, look
675 there for edge frequency counts from profile data. */
679 int n_labels = XVECLEN (jump_table, 1);
683 for (i = 0; i < n_labels; i++)
687 for (j = i + 1; j < n_labels; j++)
688 if (XEXP (XVECEXP (jump_table, 1, i), 0)
689 == XEXP (XVECEXP (jump_table, 1, j), 0))
692 if (count > best_count)
693 best_count = count, best_label = XVECEXP (jump_table, 1, i);
697 return best_label ? best_label : const0_rtx;
700 /* Return the TLS model to use for SYMBOL. */
702 static enum tls_model
703 tls_symbolic_operand_type (rtx symbol)
705 enum tls_model model;
707 if (GET_CODE (symbol) != SYMBOL_REF)
708 return TLS_MODEL_NONE;
709 model = SYMBOL_REF_TLS_MODEL (symbol);
711 /* Local-exec with a 64-bit size is the same code as initial-exec. */
712 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
713 model = TLS_MODEL_INITIAL_EXEC;
718 /* Return true if the function DECL will share the same GP as any
719 function in the current unit of translation. */
722 decl_has_samegp (const_tree decl)
724 /* Functions that are not local can be overridden, and thus may
725 not share the same gp. */
726 if (!(*targetm.binds_local_p) (decl))
729 /* If -msmall-data is in effect, assume that there is only one GP
730 for the module, and so any local symbol has this property. We
731 need explicit relocations to be able to enforce this for symbols
732 not defined in this unit of translation, however. */
733 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
736 /* Functions that are not external are defined in this UoT. */
737 /* ??? Irritatingly, static functions not yet emitted are still
738 marked "external". Apply this to non-static functions only. */
739 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
742 /* Return true if EXP should be placed in the small data section. */
745 alpha_in_small_data_p (const_tree exp)
747 /* We want to merge strings, so we never consider them small data. */
748 if (TREE_CODE (exp) == STRING_CST)
751 /* Functions are never in the small data area. Duh. */
752 if (TREE_CODE (exp) == FUNCTION_DECL)
755 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
757 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
758 if (strcmp (section, ".sdata") == 0
759 || strcmp (section, ".sbss") == 0)
764 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
766 /* If this is an incomplete type with size 0, then we can't put it
767 in sdata because it might be too big when completed. */
768 if (size > 0 && (unsigned HOST_WIDE_INT) size <= g_switch_value)
775 #if TARGET_ABI_OPEN_VMS
777 alpha_linkage_symbol_p (const char *symname)
779 int symlen = strlen (symname);
782 return strcmp (&symname [symlen - 4], "..lk") == 0;
787 #define LINKAGE_SYMBOL_REF_P(X) \
788 ((GET_CODE (X) == SYMBOL_REF \
789 && alpha_linkage_symbol_p (XSTR (X, 0))) \
790 || (GET_CODE (X) == CONST \
791 && GET_CODE (XEXP (X, 0)) == PLUS \
792 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
793 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
796 /* legitimate_address_p recognizes an RTL expression that is a valid
797 memory address for an instruction. The MODE argument is the
798 machine mode for the MEM expression that wants to use this address.
800 For Alpha, we have either a constant address or the sum of a
801 register and a constant address, or just a register. For DImode,
802 any of those forms can be surrounded with an AND that clear the
803 low-order three bits; this is an "unaligned" access. */
806 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
808 /* If this is an ldq_u type address, discard the outer AND. */
810 && GET_CODE (x) == AND
811 && CONST_INT_P (XEXP (x, 1))
812 && INTVAL (XEXP (x, 1)) == -8)
815 /* Discard non-paradoxical subregs. */
816 if (GET_CODE (x) == SUBREG
817 && (GET_MODE_SIZE (GET_MODE (x))
818 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
821 /* Unadorned general registers are valid. */
824 ? STRICT_REG_OK_FOR_BASE_P (x)
825 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
828 /* Constant addresses (i.e. +/- 32k) are valid. */
829 if (CONSTANT_ADDRESS_P (x))
832 #if TARGET_ABI_OPEN_VMS
833 if (LINKAGE_SYMBOL_REF_P (x))
837 /* Register plus a small constant offset is valid. */
838 if (GET_CODE (x) == PLUS)
840 rtx ofs = XEXP (x, 1);
843 /* Discard non-paradoxical subregs. */
844 if (GET_CODE (x) == SUBREG
845 && (GET_MODE_SIZE (GET_MODE (x))
846 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
852 && NONSTRICT_REG_OK_FP_BASE_P (x)
853 && CONST_INT_P (ofs))
856 ? STRICT_REG_OK_FOR_BASE_P (x)
857 : NONSTRICT_REG_OK_FOR_BASE_P (x))
858 && CONSTANT_ADDRESS_P (ofs))
863 /* If we're managing explicit relocations, LO_SUM is valid, as are small
864 data symbols. Avoid explicit relocations of modes larger than word
865 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
866 else if (TARGET_EXPLICIT_RELOCS
867 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
869 if (small_symbolic_operand (x, Pmode))
872 if (GET_CODE (x) == LO_SUM)
874 rtx ofs = XEXP (x, 1);
877 /* Discard non-paradoxical subregs. */
878 if (GET_CODE (x) == SUBREG
879 && (GET_MODE_SIZE (GET_MODE (x))
880 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
883 /* Must have a valid base register. */
886 ? STRICT_REG_OK_FOR_BASE_P (x)
887 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
890 /* The symbol must be local. */
891 if (local_symbolic_operand (ofs, Pmode)
892 || dtp32_symbolic_operand (ofs, Pmode)
893 || tp32_symbolic_operand (ofs, Pmode))
901 /* Build the SYMBOL_REF for __tls_get_addr. */
903 static GTY(()) rtx tls_get_addr_libfunc;
906 get_tls_get_addr (void)
908 if (!tls_get_addr_libfunc)
909 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
910 return tls_get_addr_libfunc;
913 /* Try machine-dependent ways of modifying an illegitimate address
914 to be legitimate. If we find one, return the new, valid address. */
917 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
919 HOST_WIDE_INT addend;
921 /* If the address is (plus reg const_int) and the CONST_INT is not a
922 valid offset, compute the high part of the constant and add it to
923 the register. Then our address is (plus temp low-part-const). */
924 if (GET_CODE (x) == PLUS
925 && REG_P (XEXP (x, 0))
926 && CONST_INT_P (XEXP (x, 1))
927 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
929 addend = INTVAL (XEXP (x, 1));
934 /* If the address is (const (plus FOO const_int)), find the low-order
935 part of the CONST_INT. Then load FOO plus any high-order part of the
936 CONST_INT into a register. Our address is (plus reg low-part-const).
937 This is done to reduce the number of GOT entries. */
938 if (can_create_pseudo_p ()
939 && GET_CODE (x) == CONST
940 && GET_CODE (XEXP (x, 0)) == PLUS
941 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
943 addend = INTVAL (XEXP (XEXP (x, 0), 1));
944 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
948 /* If we have a (plus reg const), emit the load as in (2), then add
949 the two registers, and finally generate (plus reg low-part-const) as
951 if (can_create_pseudo_p ()
952 && GET_CODE (x) == PLUS
953 && REG_P (XEXP (x, 0))
954 && GET_CODE (XEXP (x, 1)) == CONST
955 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
956 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
958 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
959 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
960 XEXP (XEXP (XEXP (x, 1), 0), 0),
961 NULL_RTX, 1, OPTAB_LIB_WIDEN);
965 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
966 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
967 around +/- 32k offset. */
968 if (TARGET_EXPLICIT_RELOCS
969 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
970 && symbolic_operand (x, Pmode))
972 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
974 switch (tls_symbolic_operand_type (x))
979 case TLS_MODEL_GLOBAL_DYNAMIC:
982 r0 = gen_rtx_REG (Pmode, 0);
983 r16 = gen_rtx_REG (Pmode, 16);
984 tga = get_tls_get_addr ();
985 dest = gen_reg_rtx (Pmode);
986 seq = GEN_INT (alpha_next_sequence_number++);
988 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
989 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
990 insn = emit_call_insn (insn);
991 RTL_CONST_CALL_P (insn) = 1;
992 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
997 emit_libcall_block (insn, dest, r0, x);
1000 case TLS_MODEL_LOCAL_DYNAMIC:
1003 r0 = gen_rtx_REG (Pmode, 0);
1004 r16 = gen_rtx_REG (Pmode, 16);
1005 tga = get_tls_get_addr ();
1006 scratch = gen_reg_rtx (Pmode);
1007 seq = GEN_INT (alpha_next_sequence_number++);
1009 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1010 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1011 insn = emit_call_insn (insn);
1012 RTL_CONST_CALL_P (insn) = 1;
1013 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1015 insn = get_insns ();
1018 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1019 UNSPEC_TLSLDM_CALL);
1020 emit_libcall_block (insn, scratch, r0, eqv);
1022 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1023 eqv = gen_rtx_CONST (Pmode, eqv);
1025 if (alpha_tls_size == 64)
1027 dest = gen_reg_rtx (Pmode);
1028 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1029 emit_insn (gen_adddi3 (dest, dest, scratch));
1032 if (alpha_tls_size == 32)
1034 insn = gen_rtx_HIGH (Pmode, eqv);
1035 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1036 scratch = gen_reg_rtx (Pmode);
1037 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1039 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1041 case TLS_MODEL_INITIAL_EXEC:
1042 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1043 eqv = gen_rtx_CONST (Pmode, eqv);
1044 tp = gen_reg_rtx (Pmode);
1045 scratch = gen_reg_rtx (Pmode);
1046 dest = gen_reg_rtx (Pmode);
1048 emit_insn (gen_load_tp (tp));
1049 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1050 emit_insn (gen_adddi3 (dest, tp, scratch));
1053 case TLS_MODEL_LOCAL_EXEC:
1054 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1055 eqv = gen_rtx_CONST (Pmode, eqv);
1056 tp = gen_reg_rtx (Pmode);
1058 emit_insn (gen_load_tp (tp));
1059 if (alpha_tls_size == 32)
1061 insn = gen_rtx_HIGH (Pmode, eqv);
1062 insn = gen_rtx_PLUS (Pmode, tp, insn);
1063 tp = gen_reg_rtx (Pmode);
1064 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1066 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1072 if (local_symbolic_operand (x, Pmode))
1074 if (small_symbolic_operand (x, Pmode))
1078 if (can_create_pseudo_p ())
1079 scratch = gen_reg_rtx (Pmode);
1080 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1081 gen_rtx_HIGH (Pmode, x)));
1082 return gen_rtx_LO_SUM (Pmode, scratch, x);
1091 HOST_WIDE_INT low, high;
1093 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1095 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1099 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1100 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1101 1, OPTAB_LIB_WIDEN);
1103 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1104 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1105 1, OPTAB_LIB_WIDEN);
1107 return plus_constant (x, low);
1112 /* Try machine-dependent ways of modifying an illegitimate address
1113 to be legitimate. Return X or the new, valid address. */
1116 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1117 enum machine_mode mode)
1119 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1120 return new_x ? new_x : x;
1123 /* Primarily this is required for TLS symbols, but given that our move
1124 patterns *ought* to be able to handle any symbol at any time, we
1125 should never be spilling symbolic operands to the constant pool, ever. */
1128 alpha_cannot_force_const_mem (rtx x)
1130 enum rtx_code code = GET_CODE (x);
1131 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1134 /* We do not allow indirect calls to be optimized into sibling calls, nor
1135 can we allow a call to a function with a different GP to be optimized
1139 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1141 /* Can't do indirect tail calls, since we don't know if the target
1142 uses the same GP. */
1146 /* Otherwise, we can make a tail call if the target function shares
1148 return decl_has_samegp (decl);
1152 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1156 /* Don't re-split. */
1157 if (GET_CODE (x) == LO_SUM)
1160 return small_symbolic_operand (x, Pmode) != 0;
1164 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1168 /* Don't re-split. */
1169 if (GET_CODE (x) == LO_SUM)
1172 if (small_symbolic_operand (x, Pmode))
1174 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1183 split_small_symbolic_operand (rtx x)
1186 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1190 /* Indicate that INSN cannot be duplicated. This is true for any insn
1191 that we've marked with gpdisp relocs, since those have to stay in
1192 1-1 correspondence with one another.
1194 Technically we could copy them if we could set up a mapping from one
1195 sequence number to another, across the set of insns to be duplicated.
1196 This seems overly complicated and error-prone since interblock motion
1197 from sched-ebb could move one of the pair of insns to a different block.
1199 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1200 then they'll be in a different block from their ldgp. Which could lead
1201 the bb reorder code to think that it would be ok to copy just the block
1202 containing the call and branch to the block containing the ldgp. */
1205 alpha_cannot_copy_insn_p (rtx insn)
1207 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1209 if (recog_memoized (insn) >= 0)
1210 return get_attr_cannot_copy (insn);
1216 /* Try a machine-dependent way of reloading an illegitimate address
1217 operand. If we find one, push the reload and return the new rtx. */
1220 alpha_legitimize_reload_address (rtx x,
1221 enum machine_mode mode ATTRIBUTE_UNUSED,
1222 int opnum, int type,
1223 int ind_levels ATTRIBUTE_UNUSED)
1225 /* We must recognize output that we have already generated ourselves. */
1226 if (GET_CODE (x) == PLUS
1227 && GET_CODE (XEXP (x, 0)) == PLUS
1228 && REG_P (XEXP (XEXP (x, 0), 0))
1229 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1230 && CONST_INT_P (XEXP (x, 1)))
1232 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1233 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1234 opnum, (enum reload_type) type);
1238 /* We wish to handle large displacements off a base register by
1239 splitting the addend across an ldah and the mem insn. This
1240 cuts number of extra insns needed from 3 to 1. */
1241 if (GET_CODE (x) == PLUS
1242 && REG_P (XEXP (x, 0))
1243 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1244 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1245 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1247 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1248 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1250 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1252 /* Check for 32-bit overflow. */
1253 if (high + low != val)
1256 /* Reload the high part into a base reg; leave the low part
1257 in the mem directly. */
1258 x = gen_rtx_PLUS (GET_MODE (x),
1259 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1263 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1264 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1265 opnum, (enum reload_type) type);
1272 /* Compute a (partial) cost for rtx X. Return true if the complete
1273 cost has been computed, and false if subexpressions should be
1274 scanned. In either case, *TOTAL contains the cost result. */
1277 alpha_rtx_costs (rtx x, int code, int outer_code, int *total,
1280 enum machine_mode mode = GET_MODE (x);
1281 bool float_mode_p = FLOAT_MODE_P (mode);
1282 const struct alpha_rtx_cost_data *cost_data;
1285 cost_data = &alpha_rtx_cost_size;
1287 cost_data = &alpha_rtx_cost_data[alpha_tune];
1292 /* If this is an 8-bit constant, return zero since it can be used
1293 nearly anywhere with no cost. If it is a valid operand for an
1294 ADD or AND, likewise return 0 if we know it will be used in that
1295 context. Otherwise, return 2 since it might be used there later.
1296 All other constants take at least two insns. */
1297 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1305 if (x == CONST0_RTX (mode))
1307 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1308 || (outer_code == AND && and_operand (x, VOIDmode)))
1310 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1313 *total = COSTS_N_INSNS (2);
1319 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1320 *total = COSTS_N_INSNS (outer_code != MEM);
1321 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1322 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1323 else if (tls_symbolic_operand_type (x))
1324 /* Estimate of cost for call_pal rduniq. */
1325 /* ??? How many insns do we emit here? More than one... */
1326 *total = COSTS_N_INSNS (15);
1328 /* Otherwise we do a load from the GOT. */
1329 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1333 /* This is effectively an add_operand. */
1340 *total = cost_data->fp_add;
1341 else if (GET_CODE (XEXP (x, 0)) == MULT
1342 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1344 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1345 (enum rtx_code) outer_code, speed)
1346 + rtx_cost (XEXP (x, 1),
1347 (enum rtx_code) outer_code, speed)
1348 + COSTS_N_INSNS (1));
1355 *total = cost_data->fp_mult;
1356 else if (mode == DImode)
1357 *total = cost_data->int_mult_di;
1359 *total = cost_data->int_mult_si;
1363 if (CONST_INT_P (XEXP (x, 1))
1364 && INTVAL (XEXP (x, 1)) <= 3)
1366 *total = COSTS_N_INSNS (1);
1373 *total = cost_data->int_shift;
1378 *total = cost_data->fp_add;
1380 *total = cost_data->int_cmov;
1388 *total = cost_data->int_div;
1389 else if (mode == SFmode)
1390 *total = cost_data->fp_div_sf;
1392 *total = cost_data->fp_div_df;
1396 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1402 *total = COSTS_N_INSNS (1);
1410 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1416 case UNSIGNED_FLOAT:
1419 case FLOAT_TRUNCATE:
1420 *total = cost_data->fp_add;
1424 if (MEM_P (XEXP (x, 0)))
1427 *total = cost_data->fp_add;
1435 /* REF is an alignable memory location. Place an aligned SImode
1436 reference into *PALIGNED_MEM and the number of bits to shift into
1437 *PBITNUM. SCRATCH is a free register for use in reloading out
1438 of range stack slots. */
1441 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1444 HOST_WIDE_INT disp, offset;
1446 gcc_assert (MEM_P (ref));
1448 if (reload_in_progress
1449 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1451 base = find_replacement (&XEXP (ref, 0));
1452 gcc_assert (memory_address_p (GET_MODE (ref), base));
1455 base = XEXP (ref, 0);
1457 if (GET_CODE (base) == PLUS)
1458 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1462 /* Find the byte offset within an aligned word. If the memory itself is
1463 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1464 will have examined the base register and determined it is aligned, and
1465 thus displacements from it are naturally alignable. */
1466 if (MEM_ALIGN (ref) >= 32)
1471 /* Access the entire aligned word. */
1472 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1474 /* Convert the byte offset within the word to a bit offset. */
1475 if (WORDS_BIG_ENDIAN)
1476 offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8);
1479 *pbitnum = GEN_INT (offset);
1482 /* Similar, but just get the address. Handle the two reload cases.
1483 Add EXTRA_OFFSET to the address we return. */
1486 get_unaligned_address (rtx ref)
1489 HOST_WIDE_INT offset = 0;
1491 gcc_assert (MEM_P (ref));
1493 if (reload_in_progress
1494 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1496 base = find_replacement (&XEXP (ref, 0));
1498 gcc_assert (memory_address_p (GET_MODE (ref), base));
1501 base = XEXP (ref, 0);
1503 if (GET_CODE (base) == PLUS)
1504 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1506 return plus_constant (base, offset);
1509 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1510 X is always returned in a register. */
1513 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1515 if (GET_CODE (addr) == PLUS)
1517 ofs += INTVAL (XEXP (addr, 1));
1518 addr = XEXP (addr, 0);
1521 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1522 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1525 /* On the Alpha, all (non-symbolic) constants except zero go into
1526 a floating-point register via memory. Note that we cannot
1527 return anything that is not a subset of RCLASS, and that some
1528 symbolic constants cannot be dropped to memory. */
1531 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1533 /* Zero is present in any register class. */
1534 if (x == CONST0_RTX (GET_MODE (x)))
1537 /* These sorts of constants we can easily drop to memory. */
1539 || GET_CODE (x) == CONST_DOUBLE
1540 || GET_CODE (x) == CONST_VECTOR)
1542 if (rclass == FLOAT_REGS)
1544 if (rclass == ALL_REGS)
1545 return GENERAL_REGS;
1549 /* All other kinds of constants should not (and in the case of HIGH
1550 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1551 secondary reload. */
1553 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1558 /* Inform reload about cases where moving X with a mode MODE to a register in
1559 RCLASS requires an extra scratch or immediate register. Return the class
1560 needed for the immediate register. */
1562 static enum reg_class
1563 alpha_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
1564 enum machine_mode mode, secondary_reload_info *sri)
1566 /* Loading and storing HImode or QImode values to and from memory
1567 usually requires a scratch register. */
1568 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1570 if (any_memory_operand (x, mode))
1574 if (!aligned_memory_operand (x, mode))
1575 sri->icode = reload_in_optab[mode];
1578 sri->icode = reload_out_optab[mode];
1583 /* We also cannot do integral arithmetic into FP regs, as might result
1584 from register elimination into a DImode fp register. */
1585 if (rclass == FLOAT_REGS)
1587 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1588 return GENERAL_REGS;
1589 if (in_p && INTEGRAL_MODE_P (mode)
1590 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1591 return GENERAL_REGS;
1597 /* Subfunction of the following function. Update the flags of any MEM
1598 found in part of X. */
1601 alpha_set_memflags_1 (rtx *xp, void *data)
1603 rtx x = *xp, orig = (rtx) data;
1608 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1609 MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
1610 MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
1611 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1612 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1614 /* Sadly, we cannot use alias sets because the extra aliasing
1615 produced by the AND interferes. Given that two-byte quantities
1616 are the only thing we would be able to differentiate anyway,
1617 there does not seem to be any point in convoluting the early
1618 out of the alias check. */
1623 /* Given SEQ, which is an INSN list, look for any MEMs in either
1624 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1625 volatile flags from REF into each of the MEMs found. If REF is not
1626 a MEM, don't do anything. */
1629 alpha_set_memflags (rtx seq, rtx ref)
1636 /* This is only called from alpha.md, after having had something
1637 generated from one of the insn patterns. So if everything is
1638 zero, the pattern is already up-to-date. */
1639 if (!MEM_VOLATILE_P (ref)
1640 && !MEM_IN_STRUCT_P (ref)
1641 && !MEM_SCALAR_P (ref)
1642 && !MEM_NOTRAP_P (ref)
1643 && !MEM_READONLY_P (ref))
1646 for (insn = seq; insn; insn = NEXT_INSN (insn))
1648 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1653 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1656 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1657 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1658 and return pc_rtx if successful. */
1661 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1662 HOST_WIDE_INT c, int n, bool no_output)
1664 HOST_WIDE_INT new_const;
1666 /* Use a pseudo if highly optimizing and still generating RTL. */
1668 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1671 /* If this is a sign-extended 32-bit constant, we can do this in at most
1672 three insns, so do it if we have enough insns left. We always have
1673 a sign-extended 32-bit constant when compiling on a narrow machine. */
1675 if (HOST_BITS_PER_WIDE_INT != 64
1676 || c >> 31 == -1 || c >> 31 == 0)
1678 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1679 HOST_WIDE_INT tmp1 = c - low;
1680 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1681 HOST_WIDE_INT extra = 0;
1683 /* If HIGH will be interpreted as negative but the constant is
1684 positive, we must adjust it to do two ldha insns. */
1686 if ((high & 0x8000) != 0 && c >= 0)
1690 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1693 if (c == low || (low == 0 && extra == 0))
1695 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1696 but that meant that we can't handle INT_MIN on 32-bit machines
1697 (like NT/Alpha), because we recurse indefinitely through
1698 emit_move_insn to gen_movdi. So instead, since we know exactly
1699 what we want, create it explicitly. */
1704 target = gen_reg_rtx (mode);
1705 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1708 else if (n >= 2 + (extra != 0))
1712 if (!can_create_pseudo_p ())
1714 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1718 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1721 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1722 This means that if we go through expand_binop, we'll try to
1723 generate extensions, etc, which will require new pseudos, which
1724 will fail during some split phases. The SImode add patterns
1725 still exist, but are not named. So build the insns by hand. */
1730 subtarget = gen_reg_rtx (mode);
1731 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1732 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1738 target = gen_reg_rtx (mode);
1739 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1740 insn = gen_rtx_SET (VOIDmode, target, insn);
1746 /* If we couldn't do it that way, try some other methods. But if we have
1747 no instructions left, don't bother. Likewise, if this is SImode and
1748 we can't make pseudos, we can't do anything since the expand_binop
1749 and expand_unop calls will widen and try to make pseudos. */
1751 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1754 /* Next, see if we can load a related constant and then shift and possibly
1755 negate it to get the constant we want. Try this once each increasing
1756 numbers of insns. */
1758 for (i = 1; i < n; i++)
1760 /* First, see if minus some low bits, we've an easy load of
1763 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1766 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1771 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1772 target, 0, OPTAB_WIDEN);
1776 /* Next try complementing. */
1777 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1782 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1785 /* Next try to form a constant and do a left shift. We can do this
1786 if some low-order bits are zero; the exact_log2 call below tells
1787 us that information. The bits we are shifting out could be any
1788 value, but here we'll just try the 0- and sign-extended forms of
1789 the constant. To try to increase the chance of having the same
1790 constant in more than one insn, start at the highest number of
1791 bits to shift, but try all possibilities in case a ZAPNOT will
1794 bits = exact_log2 (c & -c);
1796 for (; bits > 0; bits--)
1798 new_const = c >> bits;
1799 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1802 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1803 temp = alpha_emit_set_const (subtarget, mode, new_const,
1810 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1811 target, 0, OPTAB_WIDEN);
1815 /* Now try high-order zero bits. Here we try the shifted-in bits as
1816 all zero and all ones. Be careful to avoid shifting outside the
1817 mode and to avoid shifting outside the host wide int size. */
1818 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1819 confuse the recursive call and set all of the high 32 bits. */
1821 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1822 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1824 for (; bits > 0; bits--)
1826 new_const = c << bits;
1827 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1830 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1831 temp = alpha_emit_set_const (subtarget, mode, new_const,
1838 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1839 target, 1, OPTAB_WIDEN);
1843 /* Now try high-order 1 bits. We get that with a sign-extension.
1844 But one bit isn't enough here. Be careful to avoid shifting outside
1845 the mode and to avoid shifting outside the host wide int size. */
1847 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1848 - floor_log2 (~ c) - 2);
1850 for (; bits > 0; bits--)
1852 new_const = c << bits;
1853 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1856 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1857 temp = alpha_emit_set_const (subtarget, mode, new_const,
1864 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1865 target, 0, OPTAB_WIDEN);
1870 #if HOST_BITS_PER_WIDE_INT == 64
1871 /* Finally, see if can load a value into the target that is the same as the
1872 constant except that all bytes that are 0 are changed to be 0xff. If we
1873 can, then we can do a ZAPNOT to obtain the desired constant. */
1876 for (i = 0; i < 64; i += 8)
1877 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1878 new_const |= (HOST_WIDE_INT) 0xff << i;
1880 /* We are only called for SImode and DImode. If this is SImode, ensure that
1881 we are sign extended to a full word. */
1884 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1888 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1893 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1894 target, 0, OPTAB_WIDEN);
1902 /* Try to output insns to set TARGET equal to the constant C if it can be
1903 done in less than N insns. Do all computations in MODE. Returns the place
1904 where the output has been placed if it can be done and the insns have been
1905 emitted. If it would take more than N insns, zero is returned and no
1906 insns and emitted. */
1909 alpha_emit_set_const (rtx target, enum machine_mode mode,
1910 HOST_WIDE_INT c, int n, bool no_output)
1912 enum machine_mode orig_mode = mode;
1913 rtx orig_target = target;
1917 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1918 can't load this constant in one insn, do this in DImode. */
1919 if (!can_create_pseudo_p () && mode == SImode
1920 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1922 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1926 target = no_output ? NULL : gen_lowpart (DImode, target);
1929 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1931 target = no_output ? NULL : gen_lowpart (DImode, target);
1935 /* Try 1 insn, then 2, then up to N. */
1936 for (i = 1; i <= n; i++)
1938 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1946 insn = get_last_insn ();
1947 set = single_set (insn);
1948 if (! CONSTANT_P (SET_SRC (set)))
1949 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1954 /* Allow for the case where we changed the mode of TARGET. */
1957 if (result == target)
1958 result = orig_target;
1959 else if (mode != orig_mode)
1960 result = gen_lowpart (orig_mode, result);
1966 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1967 fall back to a straight forward decomposition. We do this to avoid
1968 exponential run times encountered when looking for longer sequences
1969 with alpha_emit_set_const. */
1972 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1974 HOST_WIDE_INT d1, d2, d3, d4;
1976 /* Decompose the entire word */
1977 #if HOST_BITS_PER_WIDE_INT >= 64
1978 gcc_assert (c2 == -(c1 < 0));
1979 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1981 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1982 c1 = (c1 - d2) >> 32;
1983 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1985 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1986 gcc_assert (c1 == d4);
1988 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1990 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1991 gcc_assert (c1 == d2);
1993 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1995 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1996 gcc_assert (c2 == d4);
1999 /* Construct the high word */
2002 emit_move_insn (target, GEN_INT (d4));
2004 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2007 emit_move_insn (target, GEN_INT (d3));
2009 /* Shift it into place */
2010 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2012 /* Add in the low bits. */
2014 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2016 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2021 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2025 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2027 HOST_WIDE_INT i0, i1;
2029 if (GET_CODE (x) == CONST_VECTOR)
2030 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2033 if (CONST_INT_P (x))
2038 else if (HOST_BITS_PER_WIDE_INT >= 64)
2040 i0 = CONST_DOUBLE_LOW (x);
2045 i0 = CONST_DOUBLE_LOW (x);
2046 i1 = CONST_DOUBLE_HIGH (x);
2053 /* Implement LEGITIMATE_CONSTANT_P. This is all constants for which we
2054 are willing to load the value into a register via a move pattern.
2055 Normally this is all symbolic constants, integral constants that
2056 take three or fewer instructions, and floating-point zero. */
2059 alpha_legitimate_constant_p (rtx x)
2061 enum machine_mode mode = GET_MODE (x);
2062 HOST_WIDE_INT i0, i1;
2064 switch (GET_CODE (x))
2071 if (GET_CODE (XEXP (x, 0)) == PLUS
2072 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2073 x = XEXP (XEXP (x, 0), 0);
2077 if (GET_CODE (x) != SYMBOL_REF)
2083 /* TLS symbols are never valid. */
2084 return SYMBOL_REF_TLS_MODEL (x) == 0;
2087 if (x == CONST0_RTX (mode))
2089 if (FLOAT_MODE_P (mode))
2094 if (x == CONST0_RTX (mode))
2096 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2098 if (GET_MODE_SIZE (mode) != 8)
2104 if (TARGET_BUILD_CONSTANTS)
2106 alpha_extract_integer (x, &i0, &i1);
2107 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2108 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2116 /* Operand 1 is known to be a constant, and should require more than one
2117 instruction to load. Emit that multi-part load. */
2120 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2122 HOST_WIDE_INT i0, i1;
2123 rtx temp = NULL_RTX;
2125 alpha_extract_integer (operands[1], &i0, &i1);
2127 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2128 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2130 if (!temp && TARGET_BUILD_CONSTANTS)
2131 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2135 if (!rtx_equal_p (operands[0], temp))
2136 emit_move_insn (operands[0], temp);
2143 /* Expand a move instruction; return true if all work is done.
2144 We don't handle non-bwx subword loads here. */
2147 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2151 /* If the output is not a register, the input must be. */
2152 if (MEM_P (operands[0])
2153 && ! reg_or_0_operand (operands[1], mode))
2154 operands[1] = force_reg (mode, operands[1]);
2156 /* Allow legitimize_address to perform some simplifications. */
2157 if (mode == Pmode && symbolic_operand (operands[1], mode))
2159 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2162 if (tmp == operands[0])
2169 /* Early out for non-constants and valid constants. */
2170 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2173 /* Split large integers. */
2174 if (CONST_INT_P (operands[1])
2175 || GET_CODE (operands[1]) == CONST_DOUBLE
2176 || GET_CODE (operands[1]) == CONST_VECTOR)
2178 if (alpha_split_const_mov (mode, operands))
2182 /* Otherwise we've nothing left but to drop the thing to memory. */
2183 tmp = force_const_mem (mode, operands[1]);
2185 if (tmp == NULL_RTX)
2188 if (reload_in_progress)
2190 emit_move_insn (operands[0], XEXP (tmp, 0));
2191 operands[1] = replace_equiv_address (tmp, operands[0]);
2194 operands[1] = validize_mem (tmp);
2198 /* Expand a non-bwx QImode or HImode move instruction;
2199 return true if all work is done. */
2202 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2206 /* If the output is not a register, the input must be. */
2207 if (MEM_P (operands[0]))
2208 operands[1] = force_reg (mode, operands[1]);
2210 /* Handle four memory cases, unaligned and aligned for either the input
2211 or the output. The only case where we can be called during reload is
2212 for aligned loads; all other cases require temporaries. */
2214 if (any_memory_operand (operands[1], mode))
2216 if (aligned_memory_operand (operands[1], mode))
2218 if (reload_in_progress)
2221 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2223 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2228 rtx aligned_mem, bitnum;
2229 rtx scratch = gen_reg_rtx (SImode);
2233 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2235 subtarget = operands[0];
2236 if (REG_P (subtarget))
2237 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2239 subtarget = gen_reg_rtx (DImode), copyout = true;
2242 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2245 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2250 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2255 /* Don't pass these as parameters since that makes the generated
2256 code depend on parameter evaluation order which will cause
2257 bootstrap failures. */
2259 rtx temp1, temp2, subtarget, ua;
2262 temp1 = gen_reg_rtx (DImode);
2263 temp2 = gen_reg_rtx (DImode);
2265 subtarget = operands[0];
2266 if (REG_P (subtarget))
2267 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2269 subtarget = gen_reg_rtx (DImode), copyout = true;
2271 ua = get_unaligned_address (operands[1]);
2273 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2275 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2277 alpha_set_memflags (seq, operands[1]);
2281 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2286 if (any_memory_operand (operands[0], mode))
2288 if (aligned_memory_operand (operands[0], mode))
2290 rtx aligned_mem, bitnum;
2291 rtx temp1 = gen_reg_rtx (SImode);
2292 rtx temp2 = gen_reg_rtx (SImode);
2294 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2296 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2301 rtx temp1 = gen_reg_rtx (DImode);
2302 rtx temp2 = gen_reg_rtx (DImode);
2303 rtx temp3 = gen_reg_rtx (DImode);
2304 rtx ua = get_unaligned_address (operands[0]);
2307 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2309 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2311 alpha_set_memflags (seq, operands[0]);
2320 /* Implement the movmisalign patterns. One of the operands is a memory
2321 that is not naturally aligned. Emit instructions to load it. */
2324 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2326 /* Honor misaligned loads, for those we promised to do so. */
2327 if (MEM_P (operands[1]))
2331 if (register_operand (operands[0], mode))
2334 tmp = gen_reg_rtx (mode);
2336 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2337 if (tmp != operands[0])
2338 emit_move_insn (operands[0], tmp);
2340 else if (MEM_P (operands[0]))
2342 if (!reg_or_0_operand (operands[1], mode))
2343 operands[1] = force_reg (mode, operands[1]);
2344 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2350 /* Generate an unsigned DImode to FP conversion. This is the same code
2351 optabs would emit if we didn't have TFmode patterns.
2353 For SFmode, this is the only construction I've found that can pass
2354 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2355 intermediates will work, because you'll get intermediate rounding
2356 that ruins the end result. Some of this could be fixed by turning
2357 on round-to-positive-infinity, but that requires diddling the fpsr,
2358 which kills performance. I tried turning this around and converting
2359 to a negative number, so that I could turn on /m, but either I did
2360 it wrong or there's something else cause I wound up with the exact
2361 same single-bit error. There is a branch-less form of this same code:
2372 fcmoveq $f10,$f11,$f0
2374 I'm not using it because it's the same number of instructions as
2375 this branch-full form, and it has more serialized long latency
2376 instructions on the critical path.
2378 For DFmode, we can avoid rounding errors by breaking up the word
2379 into two pieces, converting them separately, and adding them back:
2381 LC0: .long 0,0x5f800000
2386 cpyse $f11,$f31,$f10
2387 cpyse $f31,$f11,$f11
2395 This doesn't seem to be a clear-cut win over the optabs form.
2396 It probably all depends on the distribution of numbers being
2397 converted -- in the optabs form, all but high-bit-set has a
2398 much lower minimum execution time. */
2401 alpha_emit_floatuns (rtx operands[2])
2403 rtx neglab, donelab, i0, i1, f0, in, out;
2404 enum machine_mode mode;
2407 in = force_reg (DImode, operands[1]);
2408 mode = GET_MODE (out);
2409 neglab = gen_label_rtx ();
2410 donelab = gen_label_rtx ();
2411 i0 = gen_reg_rtx (DImode);
2412 i1 = gen_reg_rtx (DImode);
2413 f0 = gen_reg_rtx (mode);
2415 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2417 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2418 emit_jump_insn (gen_jump (donelab));
2421 emit_label (neglab);
2423 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2424 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2425 emit_insn (gen_iordi3 (i0, i0, i1));
2426 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2427 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2429 emit_label (donelab);
2432 /* Generate the comparison for a conditional branch. */
2435 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2437 enum rtx_code cmp_code, branch_code;
2438 enum machine_mode branch_mode = VOIDmode;
2439 enum rtx_code code = GET_CODE (operands[0]);
2440 rtx op0 = operands[1], op1 = operands[2];
2443 if (cmp_mode == TFmode)
2445 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2450 /* The general case: fold the comparison code to the types of compares
2451 that we have, choosing the branch as necessary. */
2454 case EQ: case LE: case LT: case LEU: case LTU:
2456 /* We have these compares: */
2457 cmp_code = code, branch_code = NE;
2462 /* These must be reversed. */
2463 cmp_code = reverse_condition (code), branch_code = EQ;
2466 case GE: case GT: case GEU: case GTU:
2467 /* For FP, we swap them, for INT, we reverse them. */
2468 if (cmp_mode == DFmode)
2470 cmp_code = swap_condition (code);
2472 tem = op0, op0 = op1, op1 = tem;
2476 cmp_code = reverse_condition (code);
2485 if (cmp_mode == DFmode)
2487 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2489 /* When we are not as concerned about non-finite values, and we
2490 are comparing against zero, we can branch directly. */
2491 if (op1 == CONST0_RTX (DFmode))
2492 cmp_code = UNKNOWN, branch_code = code;
2493 else if (op0 == CONST0_RTX (DFmode))
2495 /* Undo the swap we probably did just above. */
2496 tem = op0, op0 = op1, op1 = tem;
2497 branch_code = swap_condition (cmp_code);
2503 /* ??? We mark the branch mode to be CCmode to prevent the
2504 compare and branch from being combined, since the compare
2505 insn follows IEEE rules that the branch does not. */
2506 branch_mode = CCmode;
2511 /* The following optimizations are only for signed compares. */
2512 if (code != LEU && code != LTU && code != GEU && code != GTU)
2514 /* Whee. Compare and branch against 0 directly. */
2515 if (op1 == const0_rtx)
2516 cmp_code = UNKNOWN, branch_code = code;
2518 /* If the constants doesn't fit into an immediate, but can
2519 be generated by lda/ldah, we adjust the argument and
2520 compare against zero, so we can use beq/bne directly. */
2521 /* ??? Don't do this when comparing against symbols, otherwise
2522 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2523 be declared false out of hand (at least for non-weak). */
2524 else if (CONST_INT_P (op1)
2525 && (code == EQ || code == NE)
2526 && !(symbolic_operand (op0, VOIDmode)
2527 || (REG_P (op0) && REG_POINTER (op0))))
2529 rtx n_op1 = GEN_INT (-INTVAL (op1));
2531 if (! satisfies_constraint_I (op1)
2532 && (satisfies_constraint_K (n_op1)
2533 || satisfies_constraint_L (n_op1)))
2534 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2538 if (!reg_or_0_operand (op0, DImode))
2539 op0 = force_reg (DImode, op0);
2540 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2541 op1 = force_reg (DImode, op1);
2544 /* Emit an initial compare instruction, if necessary. */
2546 if (cmp_code != UNKNOWN)
2548 tem = gen_reg_rtx (cmp_mode);
2549 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2552 /* Emit the branch instruction. */
2553 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2554 gen_rtx_IF_THEN_ELSE (VOIDmode,
2555 gen_rtx_fmt_ee (branch_code,
2557 CONST0_RTX (cmp_mode)),
2558 gen_rtx_LABEL_REF (VOIDmode,
2561 emit_jump_insn (tem);
2564 /* Certain simplifications can be done to make invalid setcc operations
2565 valid. Return the final comparison, or NULL if we can't work. */
2568 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2570 enum rtx_code cmp_code;
2571 enum rtx_code code = GET_CODE (operands[1]);
2572 rtx op0 = operands[2], op1 = operands[3];
2575 if (cmp_mode == TFmode)
2577 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2582 if (cmp_mode == DFmode && !TARGET_FIX)
2585 /* The general case: fold the comparison code to the types of compares
2586 that we have, choosing the branch as necessary. */
2591 case EQ: case LE: case LT: case LEU: case LTU:
2593 /* We have these compares. */
2594 if (cmp_mode == DFmode)
2595 cmp_code = code, code = NE;
2599 if (cmp_mode == DImode && op1 == const0_rtx)
2604 cmp_code = reverse_condition (code);
2608 case GE: case GT: case GEU: case GTU:
2609 /* These normally need swapping, but for integer zero we have
2610 special patterns that recognize swapped operands. */
2611 if (cmp_mode == DImode && op1 == const0_rtx)
2613 code = swap_condition (code);
2614 if (cmp_mode == DFmode)
2615 cmp_code = code, code = NE;
2616 tmp = op0, op0 = op1, op1 = tmp;
2623 if (cmp_mode == DImode)
2625 if (!register_operand (op0, DImode))
2626 op0 = force_reg (DImode, op0);
2627 if (!reg_or_8bit_operand (op1, DImode))
2628 op1 = force_reg (DImode, op1);
2631 /* Emit an initial compare instruction, if necessary. */
2632 if (cmp_code != UNKNOWN)
2634 tmp = gen_reg_rtx (cmp_mode);
2635 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2636 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2638 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2642 /* Emit the setcc instruction. */
2643 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2644 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2649 /* Rewrite a comparison against zero CMP of the form
2650 (CODE (cc0) (const_int 0)) so it can be written validly in
2651 a conditional move (if_then_else CMP ...).
2652 If both of the operands that set cc0 are nonzero we must emit
2653 an insn to perform the compare (it can't be done within
2654 the conditional move). */
2657 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2659 enum rtx_code code = GET_CODE (cmp);
2660 enum rtx_code cmov_code = NE;
2661 rtx op0 = XEXP (cmp, 0);
2662 rtx op1 = XEXP (cmp, 1);
2663 enum machine_mode cmp_mode
2664 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2665 enum machine_mode cmov_mode = VOIDmode;
2666 int local_fast_math = flag_unsafe_math_optimizations;
2669 if (cmp_mode == TFmode)
2671 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2676 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2678 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2680 enum rtx_code cmp_code;
2685 /* If we have fp<->int register move instructions, do a cmov by
2686 performing the comparison in fp registers, and move the
2687 zero/nonzero value to integer registers, where we can then
2688 use a normal cmov, or vice-versa. */
2692 case EQ: case LE: case LT: case LEU: case LTU:
2693 /* We have these compares. */
2694 cmp_code = code, code = NE;
2698 /* This must be reversed. */
2699 cmp_code = EQ, code = EQ;
2702 case GE: case GT: case GEU: case GTU:
2703 /* These normally need swapping, but for integer zero we have
2704 special patterns that recognize swapped operands. */
2705 if (cmp_mode == DImode && op1 == const0_rtx)
2706 cmp_code = code, code = NE;
2709 cmp_code = swap_condition (code);
2711 tem = op0, op0 = op1, op1 = tem;
2719 tem = gen_reg_rtx (cmp_mode);
2720 emit_insn (gen_rtx_SET (VOIDmode, tem,
2721 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2724 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2725 op0 = gen_lowpart (cmp_mode, tem);
2726 op1 = CONST0_RTX (cmp_mode);
2727 local_fast_math = 1;
2730 /* We may be able to use a conditional move directly.
2731 This avoids emitting spurious compares. */
2732 if (signed_comparison_operator (cmp, VOIDmode)
2733 && (cmp_mode == DImode || local_fast_math)
2734 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2735 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2737 /* We can't put the comparison inside the conditional move;
2738 emit a compare instruction and put that inside the
2739 conditional move. Make sure we emit only comparisons we have;
2740 swap or reverse as necessary. */
2742 if (!can_create_pseudo_p ())
2747 case EQ: case LE: case LT: case LEU: case LTU:
2748 /* We have these compares: */
2752 /* This must be reversed. */
2753 code = reverse_condition (code);
2757 case GE: case GT: case GEU: case GTU:
2758 /* These must be swapped. */
2759 if (op1 != CONST0_RTX (cmp_mode))
2761 code = swap_condition (code);
2762 tem = op0, op0 = op1, op1 = tem;
2770 if (cmp_mode == DImode)
2772 if (!reg_or_0_operand (op0, DImode))
2773 op0 = force_reg (DImode, op0);
2774 if (!reg_or_8bit_operand (op1, DImode))
2775 op1 = force_reg (DImode, op1);
2778 /* ??? We mark the branch mode to be CCmode to prevent the compare
2779 and cmov from being combined, since the compare insn follows IEEE
2780 rules that the cmov does not. */
2781 if (cmp_mode == DFmode && !local_fast_math)
2784 tem = gen_reg_rtx (cmp_mode);
2785 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2786 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2789 /* Simplify a conditional move of two constants into a setcc with
2790 arithmetic. This is done with a splitter since combine would
2791 just undo the work if done during code generation. It also catches
2792 cases we wouldn't have before cse. */
2795 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2796 rtx t_rtx, rtx f_rtx)
2798 HOST_WIDE_INT t, f, diff;
2799 enum machine_mode mode;
2800 rtx target, subtarget, tmp;
2802 mode = GET_MODE (dest);
2807 if (((code == NE || code == EQ) && diff < 0)
2808 || (code == GE || code == GT))
2810 code = reverse_condition (code);
2811 diff = t, t = f, f = diff;
2815 subtarget = target = dest;
2818 target = gen_lowpart (DImode, dest);
2819 if (can_create_pseudo_p ())
2820 subtarget = gen_reg_rtx (DImode);
2824 /* Below, we must be careful to use copy_rtx on target and subtarget
2825 in intermediate insns, as they may be a subreg rtx, which may not
2828 if (f == 0 && exact_log2 (diff) > 0
2829 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2830 viable over a longer latency cmove. On EV5, the E0 slot is a
2831 scarce resource, and on EV4 shift has the same latency as a cmove. */
2832 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2834 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2835 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2837 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2838 GEN_INT (exact_log2 (t)));
2839 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2841 else if (f == 0 && t == -1)
2843 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2844 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2846 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2848 else if (diff == 1 || diff == 4 || diff == 8)
2852 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2853 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2856 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2859 add_op = GEN_INT (f);
2860 if (sext_add_operand (add_op, mode))
2862 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2864 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2865 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2877 /* Look up the function X_floating library function name for the
2880 struct GTY(()) xfloating_op
2882 const enum rtx_code code;
2883 const char *const GTY((skip)) osf_func;
2884 const char *const GTY((skip)) vms_func;
2888 static GTY(()) struct xfloating_op xfloating_ops[] =
2890 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2891 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2892 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2893 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2894 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2895 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2896 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2897 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2898 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2899 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2900 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2901 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2902 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2903 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2904 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2907 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2909 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2910 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2914 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2916 struct xfloating_op *ops = xfloating_ops;
2917 long n = ARRAY_SIZE (xfloating_ops);
2920 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2922 /* How irritating. Nothing to key off for the main table. */
2923 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2926 n = ARRAY_SIZE (vax_cvt_ops);
2929 for (i = 0; i < n; ++i, ++ops)
2930 if (ops->code == code)
2932 rtx func = ops->libcall;
2935 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2936 ? ops->vms_func : ops->osf_func);
2937 ops->libcall = func;
2945 /* Most X_floating operations take the rounding mode as an argument.
2946 Compute that here. */
2949 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2950 enum alpha_fp_rounding_mode round)
2956 case ALPHA_FPRM_NORM:
2959 case ALPHA_FPRM_MINF:
2962 case ALPHA_FPRM_CHOP:
2965 case ALPHA_FPRM_DYN:
2971 /* XXX For reference, round to +inf is mode = 3. */
2974 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2980 /* Emit an X_floating library function call.
2982 Note that these functions do not follow normal calling conventions:
2983 TFmode arguments are passed in two integer registers (as opposed to
2984 indirect); TFmode return values appear in R16+R17.
2986 FUNC is the function to call.
2987 TARGET is where the output belongs.
2988 OPERANDS are the inputs.
2989 NOPERANDS is the count of inputs.
2990 EQUIV is the expression equivalent for the function.
2994 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2995 int noperands, rtx equiv)
2997 rtx usage = NULL_RTX, tmp, reg;
3002 for (i = 0; i < noperands; ++i)
3004 switch (GET_MODE (operands[i]))
3007 reg = gen_rtx_REG (TFmode, regno);
3012 reg = gen_rtx_REG (DFmode, regno + 32);
3017 gcc_assert (CONST_INT_P (operands[i]));
3020 reg = gen_rtx_REG (DImode, regno);
3028 emit_move_insn (reg, operands[i]);
3029 usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
3032 switch (GET_MODE (target))
3035 reg = gen_rtx_REG (TFmode, 16);
3038 reg = gen_rtx_REG (DFmode, 32);
3041 reg = gen_rtx_REG (DImode, 0);
3047 tmp = gen_rtx_MEM (QImode, func);
3048 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3049 const0_rtx, const0_rtx));
3050 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3051 RTL_CONST_CALL_P (tmp) = 1;
3056 emit_libcall_block (tmp, target, reg, equiv);
3059 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3062 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3066 rtx out_operands[3];
3068 func = alpha_lookup_xfloating_lib_func (code);
3069 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3071 out_operands[0] = operands[1];
3072 out_operands[1] = operands[2];
3073 out_operands[2] = GEN_INT (mode);
3074 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3075 gen_rtx_fmt_ee (code, TFmode, operands[1],
3079 /* Emit an X_floating library function call for a comparison. */
3082 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3084 enum rtx_code cmp_code, res_code;
3085 rtx func, out, operands[2], note;
3087 /* X_floating library comparison functions return
3091 Convert the compare against the raw return value. */
3119 func = alpha_lookup_xfloating_lib_func (cmp_code);
3123 out = gen_reg_rtx (DImode);
3125 /* What's actually returned is -1,0,1, not a proper boolean value,
3126 so use an EXPR_LIST as with a generic libcall instead of a
3127 comparison type expression. */
3128 note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
3129 note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
3130 note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
3131 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3136 /* Emit an X_floating library function call for a conversion. */
3139 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3141 int noperands = 1, mode;
3142 rtx out_operands[2];
3144 enum rtx_code code = orig_code;
3146 if (code == UNSIGNED_FIX)
3149 func = alpha_lookup_xfloating_lib_func (code);
3151 out_operands[0] = operands[1];
3156 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3157 out_operands[1] = GEN_INT (mode);
3160 case FLOAT_TRUNCATE:
3161 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3162 out_operands[1] = GEN_INT (mode);
3169 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3170 gen_rtx_fmt_e (orig_code,
3171 GET_MODE (operands[0]),
3175 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3176 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3177 guarantee that the sequence
3180 is valid. Naturally, output operand ordering is little-endian.
3181 This is used by *movtf_internal and *movti_internal. */
3184 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3187 switch (GET_CODE (operands[1]))
3190 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3191 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3195 operands[3] = adjust_address (operands[1], DImode, 8);
3196 operands[2] = adjust_address (operands[1], DImode, 0);
3201 gcc_assert (operands[1] == CONST0_RTX (mode));
3202 operands[2] = operands[3] = const0_rtx;
3209 switch (GET_CODE (operands[0]))
3212 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3213 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3217 operands[1] = adjust_address (operands[0], DImode, 8);
3218 operands[0] = adjust_address (operands[0], DImode, 0);
3225 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3228 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3229 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3233 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3234 op2 is a register containing the sign bit, operation is the
3235 logical operation to be performed. */
3238 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3240 rtx high_bit = operands[2];
3244 alpha_split_tmode_pair (operands, TFmode, false);
3246 /* Detect three flavors of operand overlap. */
3248 if (rtx_equal_p (operands[0], operands[2]))
3250 else if (rtx_equal_p (operands[1], operands[2]))
3252 if (rtx_equal_p (operands[0], high_bit))
3259 emit_move_insn (operands[0], operands[2]);
3261 /* ??? If the destination overlaps both source tf and high_bit, then
3262 assume source tf is dead in its entirety and use the other half
3263 for a scratch register. Otherwise "scratch" is just the proper
3264 destination register. */
3265 scratch = operands[move < 2 ? 1 : 3];
3267 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3271 emit_move_insn (operands[0], operands[2]);
3273 emit_move_insn (operands[1], scratch);
3277 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3281 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3282 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3283 lda r3,X(r11) lda r3,X+2(r11)
3284 extwl r1,r3,r1 extql r1,r3,r1
3285 extwh r2,r3,r2 extqh r2,r3,r2
3286 or r1.r2.r1 or r1,r2,r1
3289 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3290 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3291 lda r3,X(r11) lda r3,X(r11)
3292 extll r1,r3,r1 extll r1,r3,r1
3293 extlh r2,r3,r2 extlh r2,r3,r2
3294 or r1.r2.r1 addl r1,r2,r1
3296 quad: ldq_u r1,X(r11)
3305 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3306 HOST_WIDE_INT ofs, int sign)
3308 rtx meml, memh, addr, extl, exth, tmp, mema;
3309 enum machine_mode mode;
3311 if (TARGET_BWX && size == 2)
3313 meml = adjust_address (mem, QImode, ofs);
3314 memh = adjust_address (mem, QImode, ofs+1);
3315 if (BYTES_BIG_ENDIAN)
3316 tmp = meml, meml = memh, memh = tmp;
3317 extl = gen_reg_rtx (DImode);
3318 exth = gen_reg_rtx (DImode);
3319 emit_insn (gen_zero_extendqidi2 (extl, meml));
3320 emit_insn (gen_zero_extendqidi2 (exth, memh));
3321 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3322 NULL, 1, OPTAB_LIB_WIDEN);
3323 addr = expand_simple_binop (DImode, IOR, extl, exth,
3324 NULL, 1, OPTAB_LIB_WIDEN);
3326 if (sign && GET_MODE (tgt) != HImode)
3328 addr = gen_lowpart (HImode, addr);
3329 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3333 if (GET_MODE (tgt) != DImode)
3334 addr = gen_lowpart (GET_MODE (tgt), addr);
3335 emit_move_insn (tgt, addr);
3340 meml = gen_reg_rtx (DImode);
3341 memh = gen_reg_rtx (DImode);
3342 addr = gen_reg_rtx (DImode);
3343 extl = gen_reg_rtx (DImode);
3344 exth = gen_reg_rtx (DImode);
3346 mema = XEXP (mem, 0);
3347 if (GET_CODE (mema) == LO_SUM)
3348 mema = force_reg (Pmode, mema);
3350 /* AND addresses cannot be in any alias set, since they may implicitly
3351 alias surrounding code. Ideally we'd have some alias set that
3352 covered all types except those with alignment 8 or higher. */
3354 tmp = change_address (mem, DImode,
3355 gen_rtx_AND (DImode,
3356 plus_constant (mema, ofs),
3358 set_mem_alias_set (tmp, 0);
3359 emit_move_insn (meml, tmp);
3361 tmp = change_address (mem, DImode,
3362 gen_rtx_AND (DImode,
3363 plus_constant (mema, ofs + size - 1),
3365 set_mem_alias_set (tmp, 0);
3366 emit_move_insn (memh, tmp);
3368 if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4))
3370 emit_move_insn (addr, plus_constant (mema, -1));
3372 emit_insn (gen_extqh_be (extl, meml, addr));
3373 emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr));
3375 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3376 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8),
3377 addr, 1, OPTAB_WIDEN);
3379 else if (sign && size == 2)
3381 emit_move_insn (addr, plus_constant (mema, ofs+2));
3383 emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr));
3384 emit_insn (gen_extqh_le (exth, memh, addr));
3386 /* We must use tgt here for the target. Alpha-vms port fails if we use
3387 addr for the target, because addr is marked as a pointer and combine
3388 knows that pointers are always sign-extended 32-bit values. */
3389 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3390 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3391 addr, 1, OPTAB_WIDEN);
3395 if (WORDS_BIG_ENDIAN)
3397 emit_move_insn (addr, plus_constant (mema, ofs+size-1));
3401 emit_insn (gen_extwh_be (extl, meml, addr));
3406 emit_insn (gen_extlh_be (extl, meml, addr));
3411 emit_insn (gen_extqh_be (extl, meml, addr));
3418 emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr));
3422 emit_move_insn (addr, plus_constant (mema, ofs));
3423 emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr));
3427 emit_insn (gen_extwh_le (exth, memh, addr));
3432 emit_insn (gen_extlh_le (exth, memh, addr));
3437 emit_insn (gen_extqh_le (exth, memh, addr));
3446 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3447 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3452 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3455 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3458 alpha_expand_unaligned_store (rtx dst, rtx src,
3459 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3461 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3463 if (TARGET_BWX && size == 2)
3465 if (src != const0_rtx)
3467 dstl = gen_lowpart (QImode, src);
3468 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3469 NULL, 1, OPTAB_LIB_WIDEN);
3470 dsth = gen_lowpart (QImode, dsth);
3473 dstl = dsth = const0_rtx;
3475 meml = adjust_address (dst, QImode, ofs);
3476 memh = adjust_address (dst, QImode, ofs+1);
3477 if (BYTES_BIG_ENDIAN)
3478 addr = meml, meml = memh, memh = addr;
3480 emit_move_insn (meml, dstl);
3481 emit_move_insn (memh, dsth);
3485 dstl = gen_reg_rtx (DImode);
3486 dsth = gen_reg_rtx (DImode);
3487 insl = gen_reg_rtx (DImode);
3488 insh = gen_reg_rtx (DImode);
3490 dsta = XEXP (dst, 0);
3491 if (GET_CODE (dsta) == LO_SUM)
3492 dsta = force_reg (Pmode, dsta);
3494 /* AND addresses cannot be in any alias set, since they may implicitly
3495 alias surrounding code. Ideally we'd have some alias set that
3496 covered all types except those with alignment 8 or higher. */
3498 meml = change_address (dst, DImode,
3499 gen_rtx_AND (DImode,
3500 plus_constant (dsta, ofs),
3502 set_mem_alias_set (meml, 0);
3504 memh = change_address (dst, DImode,
3505 gen_rtx_AND (DImode,
3506 plus_constant (dsta, ofs + size - 1),
3508 set_mem_alias_set (memh, 0);
3510 emit_move_insn (dsth, memh);
3511 emit_move_insn (dstl, meml);
3512 if (WORDS_BIG_ENDIAN)
3514 addr = copy_addr_to_reg (plus_constant (dsta, ofs+size-1));
3516 if (src != const0_rtx)
3521 emit_insn (gen_inswl_be (insh, gen_lowpart (HImode,src), addr));
3524 emit_insn (gen_insll_be (insh, gen_lowpart (SImode,src), addr));
3527 emit_insn (gen_insql_be (insh, gen_lowpart (DImode,src), addr));
3530 emit_insn (gen_insxh (insl, gen_lowpart (DImode, src),
3531 GEN_INT (size*8), addr));
3537 emit_insn (gen_mskxl_be (dsth, dsth, GEN_INT (0xffff), addr));
3541 rtx msk = immed_double_const (0xffffffff, 0, DImode);
3542 emit_insn (gen_mskxl_be (dsth, dsth, msk, addr));
3546 emit_insn (gen_mskxl_be (dsth, dsth, constm1_rtx, addr));
3550 emit_insn (gen_mskxh (dstl, dstl, GEN_INT (size*8), addr));
3554 addr = copy_addr_to_reg (plus_constant (dsta, ofs));
3556 if (src != CONST0_RTX (GET_MODE (src)))
3558 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3559 GEN_INT (size*8), addr));
3564 emit_insn (gen_inswl_le (insl, gen_lowpart (HImode, src), addr));
3567 emit_insn (gen_insll_le (insl, gen_lowpart (SImode, src), addr));
3570 emit_insn (gen_insql_le (insl, gen_lowpart (DImode, src), addr));
3575 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3580 emit_insn (gen_mskxl_le (dstl, dstl, GEN_INT (0xffff), addr));
3584 rtx msk = immed_double_const (0xffffffff, 0, DImode);
3585 emit_insn (gen_mskxl_le (dstl, dstl, msk, addr));
3589 emit_insn (gen_mskxl_le (dstl, dstl, constm1_rtx, addr));
3594 if (src != CONST0_RTX (GET_MODE (src)))
3596 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3597 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3600 if (WORDS_BIG_ENDIAN)
3602 emit_move_insn (meml, dstl);
3603 emit_move_insn (memh, dsth);
3607 /* Must store high before low for degenerate case of aligned. */
3608 emit_move_insn (memh, dsth);
3609 emit_move_insn (meml, dstl);
3613 /* The block move code tries to maximize speed by separating loads and
3614 stores at the expense of register pressure: we load all of the data
3615 before we store it back out. There are two secondary effects worth
3616 mentioning, that this speeds copying to/from aligned and unaligned
3617 buffers, and that it makes the code significantly easier to write. */
3619 #define MAX_MOVE_WORDS 8
3621 /* Load an integral number of consecutive unaligned quadwords. */
3624 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3625 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3627 rtx const im8 = GEN_INT (-8);
3628 rtx const i64 = GEN_INT (64);
3629 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3630 rtx sreg, areg, tmp, smema;
3633 smema = XEXP (smem, 0);
3634 if (GET_CODE (smema) == LO_SUM)
3635 smema = force_reg (Pmode, smema);
3637 /* Generate all the tmp registers we need. */
3638 for (i = 0; i < words; ++i)
3640 data_regs[i] = out_regs[i];
3641 ext_tmps[i] = gen_reg_rtx (DImode);
3643 data_regs[words] = gen_reg_rtx (DImode);
3646 smem = adjust_address (smem, GET_MODE (smem), ofs);
3648 /* Load up all of the source data. */
3649 for (i = 0; i < words; ++i)
3651 tmp = change_address (smem, DImode,
3652 gen_rtx_AND (DImode,
3653 plus_constant (smema, 8*i),
3655 set_mem_alias_set (tmp, 0);
3656 emit_move_insn (data_regs[i], tmp);
3659 tmp = change_address (smem, DImode,
3660 gen_rtx_AND (DImode,
3661 plus_constant (smema, 8*words - 1),
3663 set_mem_alias_set (tmp, 0);
3664 emit_move_insn (data_regs[words], tmp);
3666 /* Extract the half-word fragments. Unfortunately DEC decided to make
3667 extxh with offset zero a noop instead of zeroing the register, so
3668 we must take care of that edge condition ourselves with cmov. */
3670 sreg = copy_addr_to_reg (smema);
3671 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3673 if (WORDS_BIG_ENDIAN)
3674 emit_move_insn (sreg, plus_constant (sreg, 7));
3675 for (i = 0; i < words; ++i)
3677 if (WORDS_BIG_ENDIAN)
3679 emit_insn (gen_extqh_be (data_regs[i], data_regs[i], sreg));
3680 emit_insn (gen_extxl_be (ext_tmps[i], data_regs[i+1], i64, sreg));
3684 emit_insn (gen_extxl_le (data_regs[i], data_regs[i], i64, sreg));
3685 emit_insn (gen_extqh_le (ext_tmps[i], data_regs[i+1], sreg));
3687 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3688 gen_rtx_IF_THEN_ELSE (DImode,
3689 gen_rtx_EQ (DImode, areg,
3691 const0_rtx, ext_tmps[i])));
3694 /* Merge the half-words into whole words. */
3695 for (i = 0; i < words; ++i)
3697 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3698 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3702 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3703 may be NULL to store zeros. */
3706 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3707 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3709 rtx const im8 = GEN_INT (-8);
3710 rtx const i64 = GEN_INT (64);
3711 rtx ins_tmps[MAX_MOVE_WORDS];
3712 rtx st_tmp_1, st_tmp_2, dreg;
3713 rtx st_addr_1, st_addr_2, dmema;
3716 dmema = XEXP (dmem, 0);
3717 if (GET_CODE (dmema) == LO_SUM)
3718 dmema = force_reg (Pmode, dmema);
3720 /* Generate all the tmp registers we need. */
3721 if (data_regs != NULL)
3722 for (i = 0; i < words; ++i)
3723 ins_tmps[i] = gen_reg_rtx(DImode);
3724 st_tmp_1 = gen_reg_rtx(DImode);
3725 st_tmp_2 = gen_reg_rtx(DImode);
3728 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3730 st_addr_2 = change_address (dmem, DImode,
3731 gen_rtx_AND (DImode,
3732 plus_constant (dmema, words*8 - 1),
3734 set_mem_alias_set (st_addr_2, 0);
3736 st_addr_1 = change_address (dmem, DImode,
3737 gen_rtx_AND (DImode, dmema, im8));
3738 set_mem_alias_set (st_addr_1, 0);
3740 /* Load up the destination end bits. */
3741 emit_move_insn (st_tmp_2, st_addr_2);
3742 emit_move_insn (st_tmp_1, st_addr_1);
3744 /* Shift the input data into place. */
3745 dreg = copy_addr_to_reg (dmema);
3746 if (WORDS_BIG_ENDIAN)
3747 emit_move_insn (dreg, plus_constant (dreg, 7));
3748 if (data_regs != NULL)
3750 for (i = words-1; i >= 0; --i)
3752 if (WORDS_BIG_ENDIAN)
3754 emit_insn (gen_insql_be (ins_tmps[i], data_regs[i], dreg));
3755 emit_insn (gen_insxh (data_regs[i], data_regs[i], i64, dreg));
3759 emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
3760 emit_insn (gen_insql_le (data_regs[i], data_regs[i], dreg));
3763 for (i = words-1; i > 0; --i)
3765 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3766 ins_tmps[i-1], ins_tmps[i-1], 1,
3771 /* Split and merge the ends with the destination data. */
3772 if (WORDS_BIG_ENDIAN)
3774 emit_insn (gen_mskxl_be (st_tmp_2, st_tmp_2, constm1_rtx, dreg));
3775 emit_insn (gen_mskxh (st_tmp_1, st_tmp_1, i64, dreg));
3779 emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
3780 emit_insn (gen_mskxl_le (st_tmp_1, st_tmp_1, constm1_rtx, dreg));
3783 if (data_regs != NULL)
3785 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3786 st_tmp_2, 1, OPTAB_WIDEN);
3787 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3788 st_tmp_1, 1, OPTAB_WIDEN);
3792 if (WORDS_BIG_ENDIAN)
3793 emit_move_insn (st_addr_1, st_tmp_1);
3795 emit_move_insn (st_addr_2, st_tmp_2);
3796 for (i = words-1; i > 0; --i)
3798 rtx tmp = change_address (dmem, DImode,
3799 gen_rtx_AND (DImode,
3800 plus_constant(dmema,
3801 WORDS_BIG_ENDIAN ? i*8-1 : i*8),
3803 set_mem_alias_set (tmp, 0);
3804 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3806 if (WORDS_BIG_ENDIAN)
3807 emit_move_insn (st_addr_2, st_tmp_2);
3809 emit_move_insn (st_addr_1, st_tmp_1);
3813 /* Expand string/block move operations.
3815 operands[0] is the pointer to the destination.
3816 operands[1] is the pointer to the source.
3817 operands[2] is the number of bytes to move.
3818 operands[3] is the alignment. */
3821 alpha_expand_block_move (rtx operands[])
3823 rtx bytes_rtx = operands[2];
3824 rtx align_rtx = operands[3];
3825 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3826 HOST_WIDE_INT bytes = orig_bytes;
3827 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3828 HOST_WIDE_INT dst_align = src_align;
3829 rtx orig_src = operands[1];
3830 rtx orig_dst = operands[0];
3831 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3833 unsigned int i, words, ofs, nregs = 0;
3835 if (orig_bytes <= 0)
3837 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3840 /* Look for additional alignment information from recorded register info. */
3842 tmp = XEXP (orig_src, 0);
3844 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3845 else if (GET_CODE (tmp) == PLUS
3846 && REG_P (XEXP (tmp, 0))
3847 && CONST_INT_P (XEXP (tmp, 1)))
3849 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3850 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3854 if (a >= 64 && c % 8 == 0)
3856 else if (a >= 32 && c % 4 == 0)
3858 else if (a >= 16 && c % 2 == 0)
3863 tmp = XEXP (orig_dst, 0);
3865 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3866 else if (GET_CODE (tmp) == PLUS
3867 && REG_P (XEXP (tmp, 0))
3868 && CONST_INT_P (XEXP (tmp, 1)))
3870 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3871 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3875 if (a >= 64 && c % 8 == 0)
3877 else if (a >= 32 && c % 4 == 0)
3879 else if (a >= 16 && c % 2 == 0)
3885 if (src_align >= 64 && bytes >= 8)
3889 for (i = 0; i < words; ++i)
3890 data_regs[nregs + i] = gen_reg_rtx (DImode);
3892 for (i = 0; i < words; ++i)
3893 emit_move_insn (data_regs[nregs + i],
3894 adjust_address (orig_src, DImode, ofs + i * 8));
3901 if (src_align >= 32 && bytes >= 4)
3905 for (i = 0; i < words; ++i)
3906 data_regs[nregs + i] = gen_reg_rtx (SImode);
3908 for (i = 0; i < words; ++i)
3909 emit_move_insn (data_regs[nregs + i],
3910 adjust_address (orig_src, SImode, ofs + i * 4));
3921 for (i = 0; i < words+1; ++i)
3922 data_regs[nregs + i] = gen_reg_rtx (DImode);
3924 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3932 if (! TARGET_BWX && bytes >= 4)
3934 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3935 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3942 if (src_align >= 16)
3945 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3946 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3949 } while (bytes >= 2);
3951 else if (! TARGET_BWX)
3953 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3954 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3962 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3963 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3968 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3970 /* Now save it back out again. */
3974 /* Write out the data in whatever chunks reading the source allowed. */
3975 if (dst_align >= 64)
3977 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3979 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3986 if (dst_align >= 32)
3988 /* If the source has remaining DImode regs, write them out in
3990 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3992 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3993 NULL_RTX, 1, OPTAB_WIDEN);
3995 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3996 gen_lowpart (SImode, data_regs[i]));
3997 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3998 gen_lowpart (SImode, tmp));
4003 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4005 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
4012 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
4014 /* Write out a remaining block of words using unaligned methods. */
4016 for (words = 1; i + words < nregs; words++)
4017 if (GET_MODE (data_regs[i + words]) != DImode)
4021 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
4023 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
4030 /* Due to the above, this won't be aligned. */
4031 /* ??? If we have more than one of these, consider constructing full
4032 words in registers and using alpha_expand_unaligned_store_words. */
4033 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4035 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
4040 if (dst_align >= 16)
4041 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4043 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);