1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
44 #include "diagnostic-core.h"
46 #include "integrate.h"
49 #include "target-def.h"
50 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "splay-tree.h"
54 #include "cfglayout.h"
56 #include "tree-flow.h"
57 #include "tree-stdarg.h"
58 #include "tm-constrs.h"
63 /* Specify which cpu to schedule for. */
64 enum processor_type alpha_tune;
66 /* Which cpu we're generating code for. */
67 enum processor_type alpha_cpu;
69 static const char * const alpha_cpu_name[] =
74 /* Specify how accurate floating-point traps need to be. */
76 enum alpha_trap_precision alpha_tp;
78 /* Specify the floating-point rounding mode. */
80 enum alpha_fp_rounding_mode alpha_fprm;
82 /* Specify which things cause traps. */
84 enum alpha_fp_trap_mode alpha_fptm;
86 /* Nonzero if inside of a function, because the Alpha asm can't
87 handle .files inside of functions. */
89 static int inside_function = FALSE;
91 /* The number of cycles of latency we should assume on memory reads. */
93 int alpha_memory_latency = 3;
95 /* Whether the function needs the GP. */
97 static int alpha_function_needs_gp;
99 /* The assembler name of the current function. */
101 static const char *alpha_fnname;
103 /* The next explicit relocation sequence number. */
104 extern GTY(()) int alpha_next_sequence_number;
105 int alpha_next_sequence_number = 1;
107 /* The literal and gpdisp sequence numbers for this insn, as printed
108 by %# and %* respectively. */
109 extern GTY(()) int alpha_this_literal_sequence_number;
110 extern GTY(()) int alpha_this_gpdisp_sequence_number;
111 int alpha_this_literal_sequence_number;
112 int alpha_this_gpdisp_sequence_number;
114 /* Costs of various operations on the different architectures. */
116 struct alpha_rtx_cost_data
118 unsigned char fp_add;
119 unsigned char fp_mult;
120 unsigned char fp_div_sf;
121 unsigned char fp_div_df;
122 unsigned char int_mult_si;
123 unsigned char int_mult_di;
124 unsigned char int_shift;
125 unsigned char int_cmov;
126 unsigned short int_div;
129 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
132 COSTS_N_INSNS (6), /* fp_add */
133 COSTS_N_INSNS (6), /* fp_mult */
134 COSTS_N_INSNS (34), /* fp_div_sf */
135 COSTS_N_INSNS (63), /* fp_div_df */
136 COSTS_N_INSNS (23), /* int_mult_si */
137 COSTS_N_INSNS (23), /* int_mult_di */
138 COSTS_N_INSNS (2), /* int_shift */
139 COSTS_N_INSNS (2), /* int_cmov */
140 COSTS_N_INSNS (97), /* int_div */
143 COSTS_N_INSNS (4), /* fp_add */
144 COSTS_N_INSNS (4), /* fp_mult */
145 COSTS_N_INSNS (15), /* fp_div_sf */
146 COSTS_N_INSNS (22), /* fp_div_df */
147 COSTS_N_INSNS (8), /* int_mult_si */
148 COSTS_N_INSNS (12), /* int_mult_di */
149 COSTS_N_INSNS (1) + 1, /* int_shift */
150 COSTS_N_INSNS (1), /* int_cmov */
151 COSTS_N_INSNS (83), /* int_div */
154 COSTS_N_INSNS (4), /* fp_add */
155 COSTS_N_INSNS (4), /* fp_mult */
156 COSTS_N_INSNS (12), /* fp_div_sf */
157 COSTS_N_INSNS (15), /* fp_div_df */
158 COSTS_N_INSNS (7), /* int_mult_si */
159 COSTS_N_INSNS (7), /* int_mult_di */
160 COSTS_N_INSNS (1), /* int_shift */
161 COSTS_N_INSNS (2), /* int_cmov */
162 COSTS_N_INSNS (86), /* int_div */
166 /* Similar but tuned for code size instead of execution latency. The
167 extra +N is fractional cost tuning based on latency. It's used to
168 encourage use of cheaper insns like shift, but only if there's just
171 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
173 COSTS_N_INSNS (1), /* fp_add */
174 COSTS_N_INSNS (1), /* fp_mult */
175 COSTS_N_INSNS (1), /* fp_div_sf */
176 COSTS_N_INSNS (1) + 1, /* fp_div_df */
177 COSTS_N_INSNS (1) + 1, /* int_mult_si */
178 COSTS_N_INSNS (1) + 2, /* int_mult_di */
179 COSTS_N_INSNS (1), /* int_shift */
180 COSTS_N_INSNS (1), /* int_cmov */
181 COSTS_N_INSNS (6), /* int_div */
184 /* Get the number of args of a function in one of two ways. */
185 #if TARGET_ABI_OPEN_VMS
186 #define NUM_ARGS crtl->args.info.num_args
188 #define NUM_ARGS crtl->args.info
194 /* Declarations of static functions. */
195 static struct machine_function *alpha_init_machine_status (void);
196 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
198 #if TARGET_ABI_OPEN_VMS
199 static void alpha_write_linkage (FILE *, const char *);
200 static bool vms_valid_pointer_mode (enum machine_mode);
202 #define vms_patch_builtins() gcc_unreachable()
205 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
206 /* Implement TARGET_MANGLE_TYPE. */
209 alpha_mangle_type (const_tree type)
211 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
212 && TARGET_LONG_DOUBLE_128)
215 /* For all other types, use normal C++ mangling. */
220 /* Parse target option strings. */
223 alpha_option_override (void)
225 static const struct cpu_table {
226 const char *const name;
227 const enum processor_type processor;
230 { "ev4", PROCESSOR_EV4, 0 },
231 { "ev45", PROCESSOR_EV4, 0 },
232 { "21064", PROCESSOR_EV4, 0 },
233 { "ev5", PROCESSOR_EV5, 0 },
234 { "21164", PROCESSOR_EV5, 0 },
235 { "ev56", PROCESSOR_EV5, MASK_BWX },
236 { "21164a", PROCESSOR_EV5, MASK_BWX },
237 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX },
238 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
239 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
240 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
241 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
242 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
243 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }
246 int const ct_size = ARRAY_SIZE (cpu_table);
249 #ifdef SUBTARGET_OVERRIDE_OPTIONS
250 SUBTARGET_OVERRIDE_OPTIONS;
253 /* Default to full IEEE compliance mode for Go language. */
254 if (strcmp (lang_hooks.name, "GNU Go") == 0
255 && !(target_flags_explicit & MASK_IEEE))
256 target_flags |= MASK_IEEE;
258 alpha_fprm = ALPHA_FPRM_NORM;
259 alpha_tp = ALPHA_TP_PROG;
260 alpha_fptm = ALPHA_FPTM_N;
264 alpha_tp = ALPHA_TP_INSN;
265 alpha_fptm = ALPHA_FPTM_SU;
267 if (TARGET_IEEE_WITH_INEXACT)
269 alpha_tp = ALPHA_TP_INSN;
270 alpha_fptm = ALPHA_FPTM_SUI;
275 if (! strcmp (alpha_tp_string, "p"))
276 alpha_tp = ALPHA_TP_PROG;
277 else if (! strcmp (alpha_tp_string, "f"))
278 alpha_tp = ALPHA_TP_FUNC;
279 else if (! strcmp (alpha_tp_string, "i"))
280 alpha_tp = ALPHA_TP_INSN;
282 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
285 if (alpha_fprm_string)
287 if (! strcmp (alpha_fprm_string, "n"))
288 alpha_fprm = ALPHA_FPRM_NORM;
289 else if (! strcmp (alpha_fprm_string, "m"))
290 alpha_fprm = ALPHA_FPRM_MINF;
291 else if (! strcmp (alpha_fprm_string, "c"))
292 alpha_fprm = ALPHA_FPRM_CHOP;
293 else if (! strcmp (alpha_fprm_string,"d"))
294 alpha_fprm = ALPHA_FPRM_DYN;
296 error ("bad value %qs for -mfp-rounding-mode switch",
300 if (alpha_fptm_string)
302 if (strcmp (alpha_fptm_string, "n") == 0)
303 alpha_fptm = ALPHA_FPTM_N;
304 else if (strcmp (alpha_fptm_string, "u") == 0)
305 alpha_fptm = ALPHA_FPTM_U;
306 else if (strcmp (alpha_fptm_string, "su") == 0)
307 alpha_fptm = ALPHA_FPTM_SU;
308 else if (strcmp (alpha_fptm_string, "sui") == 0)
309 alpha_fptm = ALPHA_FPTM_SUI;
311 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
314 if (alpha_cpu_string)
316 for (i = 0; i < ct_size; i++)
317 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
319 alpha_tune = alpha_cpu = cpu_table [i].processor;
320 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
321 target_flags |= cpu_table [i].flags;
325 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
328 if (alpha_tune_string)
330 for (i = 0; i < ct_size; i++)
331 if (! strcmp (alpha_tune_string, cpu_table [i].name))
333 alpha_tune = cpu_table [i].processor;
337 error ("bad value %qs for -mtune switch", alpha_tune_string);
340 /* Do some sanity checks on the above options. */
342 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
343 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
345 warning (0, "fp software completion requires -mtrap-precision=i");
346 alpha_tp = ALPHA_TP_INSN;
349 if (alpha_cpu == PROCESSOR_EV6)
351 /* Except for EV6 pass 1 (not released), we always have precise
352 arithmetic traps. Which means we can do software completion
353 without minding trap shadows. */
354 alpha_tp = ALPHA_TP_PROG;
357 if (TARGET_FLOAT_VAX)
359 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
361 warning (0, "rounding mode not supported for VAX floats");
362 alpha_fprm = ALPHA_FPRM_NORM;
364 if (alpha_fptm == ALPHA_FPTM_SUI)
366 warning (0, "trap mode not supported for VAX floats");
367 alpha_fptm = ALPHA_FPTM_SU;
369 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
370 warning (0, "128-bit long double not supported for VAX floats");
371 target_flags &= ~MASK_LONG_DOUBLE_128;
378 if (!alpha_mlat_string)
379 alpha_mlat_string = "L1";
381 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
382 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
384 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
385 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
386 && alpha_mlat_string[2] == '\0')
388 static int const cache_latency[][4] =
390 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
391 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
392 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
395 lat = alpha_mlat_string[1] - '0';
396 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
398 warning (0, "L%d cache latency unknown for %s",
399 lat, alpha_cpu_name[alpha_tune]);
403 lat = cache_latency[alpha_tune][lat-1];
405 else if (! strcmp (alpha_mlat_string, "main"))
407 /* Most current memories have about 370ns latency. This is
408 a reasonable guess for a fast cpu. */
413 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
417 alpha_memory_latency = lat;
420 /* Default the definition of "small data" to 8 bytes. */
421 if (!global_options_set.x_g_switch_value)
424 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
426 target_flags |= MASK_SMALL_DATA;
427 else if (flag_pic == 2)
428 target_flags &= ~MASK_SMALL_DATA;
430 /* Align labels and loops for optimal branching. */
431 /* ??? Kludge these by not doing anything if we don't optimize and also if
432 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
433 if (optimize > 0 && write_symbols != SDB_DEBUG)
435 if (align_loops <= 0)
437 if (align_jumps <= 0)
440 if (align_functions <= 0)
441 align_functions = 16;
443 /* Register variables and functions with the garbage collector. */
445 /* Set up function hooks. */
446 init_machine_status = alpha_init_machine_status;
448 /* Tell the compiler when we're using VAX floating point. */
449 if (TARGET_FLOAT_VAX)
451 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
452 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
453 REAL_MODE_FORMAT (TFmode) = NULL;
456 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
457 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
458 target_flags |= MASK_LONG_DOUBLE_128;
462 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
465 zap_mask (HOST_WIDE_INT value)
469 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
471 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
477 /* Return true if OP is valid for a particular TLS relocation.
478 We are already guaranteed that OP is a CONST. */
481 tls_symbolic_operand_1 (rtx op, int size, int unspec)
485 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
487 op = XVECEXP (op, 0, 0);
489 if (GET_CODE (op) != SYMBOL_REF)
492 switch (SYMBOL_REF_TLS_MODEL (op))
494 case TLS_MODEL_LOCAL_DYNAMIC:
495 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
496 case TLS_MODEL_INITIAL_EXEC:
497 return unspec == UNSPEC_TPREL && size == 64;
498 case TLS_MODEL_LOCAL_EXEC:
499 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
505 /* Used by aligned_memory_operand and unaligned_memory_operand to
506 resolve what reload is going to do with OP if it's a register. */
509 resolve_reload_operand (rtx op)
511 if (reload_in_progress)
514 if (GET_CODE (tmp) == SUBREG)
515 tmp = SUBREG_REG (tmp);
517 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
519 op = reg_equiv_memory_loc (REGNO (tmp));
527 /* The scalar modes supported differs from the default check-what-c-supports
528 version in that sometimes TFmode is available even when long double
529 indicates only DFmode. */
532 alpha_scalar_mode_supported_p (enum machine_mode mode)
540 case TImode: /* via optabs.c */
548 return TARGET_HAS_XFLOATING_LIBS;
555 /* Alpha implements a couple of integer vector mode operations when
556 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
557 which allows the vectorizer to operate on e.g. move instructions,
558 or when expand_vector_operations can do something useful. */
561 alpha_vector_mode_supported_p (enum machine_mode mode)
563 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
566 /* Return 1 if this function can directly return via $26. */
571 return (TARGET_ABI_OSF
573 && alpha_sa_size () == 0
574 && get_frame_size () == 0
575 && crtl->outgoing_args_size == 0
576 && crtl->args.pretend_args_size == 0);
579 /* Return the TLS model to use for SYMBOL. */
581 static enum tls_model
582 tls_symbolic_operand_type (rtx symbol)
584 enum tls_model model;
586 if (GET_CODE (symbol) != SYMBOL_REF)
587 return TLS_MODEL_NONE;
588 model = SYMBOL_REF_TLS_MODEL (symbol);
590 /* Local-exec with a 64-bit size is the same code as initial-exec. */
591 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
592 model = TLS_MODEL_INITIAL_EXEC;
597 /* Return true if the function DECL will share the same GP as any
598 function in the current unit of translation. */
601 decl_has_samegp (const_tree decl)
603 /* Functions that are not local can be overridden, and thus may
604 not share the same gp. */
605 if (!(*targetm.binds_local_p) (decl))
608 /* If -msmall-data is in effect, assume that there is only one GP
609 for the module, and so any local symbol has this property. We
610 need explicit relocations to be able to enforce this for symbols
611 not defined in this unit of translation, however. */
612 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
615 /* Functions that are not external are defined in this UoT. */
616 /* ??? Irritatingly, static functions not yet emitted are still
617 marked "external". Apply this to non-static functions only. */
618 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
621 /* Return true if EXP should be placed in the small data section. */
624 alpha_in_small_data_p (const_tree exp)
626 /* We want to merge strings, so we never consider them small data. */
627 if (TREE_CODE (exp) == STRING_CST)
630 /* Functions are never in the small data area. Duh. */
631 if (TREE_CODE (exp) == FUNCTION_DECL)
634 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
636 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
637 if (strcmp (section, ".sdata") == 0
638 || strcmp (section, ".sbss") == 0)
643 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
645 /* If this is an incomplete type with size 0, then we can't put it
646 in sdata because it might be too big when completed. */
647 if (size > 0 && size <= g_switch_value)
654 #if TARGET_ABI_OPEN_VMS
656 vms_valid_pointer_mode (enum machine_mode mode)
658 return (mode == SImode || mode == DImode);
662 alpha_linkage_symbol_p (const char *symname)
664 int symlen = strlen (symname);
667 return strcmp (&symname [symlen - 4], "..lk") == 0;
672 #define LINKAGE_SYMBOL_REF_P(X) \
673 ((GET_CODE (X) == SYMBOL_REF \
674 && alpha_linkage_symbol_p (XSTR (X, 0))) \
675 || (GET_CODE (X) == CONST \
676 && GET_CODE (XEXP (X, 0)) == PLUS \
677 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
678 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
681 /* legitimate_address_p recognizes an RTL expression that is a valid
682 memory address for an instruction. The MODE argument is the
683 machine mode for the MEM expression that wants to use this address.
685 For Alpha, we have either a constant address or the sum of a
686 register and a constant address, or just a register. For DImode,
687 any of those forms can be surrounded with an AND that clear the
688 low-order three bits; this is an "unaligned" access. */
691 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
693 /* If this is an ldq_u type address, discard the outer AND. */
695 && GET_CODE (x) == AND
696 && CONST_INT_P (XEXP (x, 1))
697 && INTVAL (XEXP (x, 1)) == -8)
700 /* Discard non-paradoxical subregs. */
701 if (GET_CODE (x) == SUBREG
702 && (GET_MODE_SIZE (GET_MODE (x))
703 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
706 /* Unadorned general registers are valid. */
709 ? STRICT_REG_OK_FOR_BASE_P (x)
710 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
713 /* Constant addresses (i.e. +/- 32k) are valid. */
714 if (CONSTANT_ADDRESS_P (x))
717 #if TARGET_ABI_OPEN_VMS
718 if (LINKAGE_SYMBOL_REF_P (x))
722 /* Register plus a small constant offset is valid. */
723 if (GET_CODE (x) == PLUS)
725 rtx ofs = XEXP (x, 1);
728 /* Discard non-paradoxical subregs. */
729 if (GET_CODE (x) == SUBREG
730 && (GET_MODE_SIZE (GET_MODE (x))
731 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
737 && NONSTRICT_REG_OK_FP_BASE_P (x)
738 && CONST_INT_P (ofs))
741 ? STRICT_REG_OK_FOR_BASE_P (x)
742 : NONSTRICT_REG_OK_FOR_BASE_P (x))
743 && CONSTANT_ADDRESS_P (ofs))
748 /* If we're managing explicit relocations, LO_SUM is valid, as are small
749 data symbols. Avoid explicit relocations of modes larger than word
750 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
751 else if (TARGET_EXPLICIT_RELOCS
752 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
754 if (small_symbolic_operand (x, Pmode))
757 if (GET_CODE (x) == LO_SUM)
759 rtx ofs = XEXP (x, 1);
762 /* Discard non-paradoxical subregs. */
763 if (GET_CODE (x) == SUBREG
764 && (GET_MODE_SIZE (GET_MODE (x))
765 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
768 /* Must have a valid base register. */
771 ? STRICT_REG_OK_FOR_BASE_P (x)
772 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
775 /* The symbol must be local. */
776 if (local_symbolic_operand (ofs, Pmode)
777 || dtp32_symbolic_operand (ofs, Pmode)
778 || tp32_symbolic_operand (ofs, Pmode))
786 /* Build the SYMBOL_REF for __tls_get_addr. */
788 static GTY(()) rtx tls_get_addr_libfunc;
791 get_tls_get_addr (void)
793 if (!tls_get_addr_libfunc)
794 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
795 return tls_get_addr_libfunc;
798 /* Try machine-dependent ways of modifying an illegitimate address
799 to be legitimate. If we find one, return the new, valid address. */
802 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
804 HOST_WIDE_INT addend;
806 /* If the address is (plus reg const_int) and the CONST_INT is not a
807 valid offset, compute the high part of the constant and add it to
808 the register. Then our address is (plus temp low-part-const). */
809 if (GET_CODE (x) == PLUS
810 && REG_P (XEXP (x, 0))
811 && CONST_INT_P (XEXP (x, 1))
812 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
814 addend = INTVAL (XEXP (x, 1));
819 /* If the address is (const (plus FOO const_int)), find the low-order
820 part of the CONST_INT. Then load FOO plus any high-order part of the
821 CONST_INT into a register. Our address is (plus reg low-part-const).
822 This is done to reduce the number of GOT entries. */
823 if (can_create_pseudo_p ()
824 && GET_CODE (x) == CONST
825 && GET_CODE (XEXP (x, 0)) == PLUS
826 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
828 addend = INTVAL (XEXP (XEXP (x, 0), 1));
829 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
833 /* If we have a (plus reg const), emit the load as in (2), then add
834 the two registers, and finally generate (plus reg low-part-const) as
836 if (can_create_pseudo_p ()
837 && GET_CODE (x) == PLUS
838 && REG_P (XEXP (x, 0))
839 && GET_CODE (XEXP (x, 1)) == CONST
840 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
841 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
843 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
844 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
845 XEXP (XEXP (XEXP (x, 1), 0), 0),
846 NULL_RTX, 1, OPTAB_LIB_WIDEN);
850 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
851 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
852 around +/- 32k offset. */
853 if (TARGET_EXPLICIT_RELOCS
854 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
855 && symbolic_operand (x, Pmode))
857 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
859 switch (tls_symbolic_operand_type (x))
864 case TLS_MODEL_GLOBAL_DYNAMIC:
867 r0 = gen_rtx_REG (Pmode, 0);
868 r16 = gen_rtx_REG (Pmode, 16);
869 tga = get_tls_get_addr ();
870 dest = gen_reg_rtx (Pmode);
871 seq = GEN_INT (alpha_next_sequence_number++);
873 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
874 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
875 insn = emit_call_insn (insn);
876 RTL_CONST_CALL_P (insn) = 1;
877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
882 emit_libcall_block (insn, dest, r0, x);
885 case TLS_MODEL_LOCAL_DYNAMIC:
888 r0 = gen_rtx_REG (Pmode, 0);
889 r16 = gen_rtx_REG (Pmode, 16);
890 tga = get_tls_get_addr ();
891 scratch = gen_reg_rtx (Pmode);
892 seq = GEN_INT (alpha_next_sequence_number++);
894 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
895 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
896 insn = emit_call_insn (insn);
897 RTL_CONST_CALL_P (insn) = 1;
898 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
903 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
905 emit_libcall_block (insn, scratch, r0, eqv);
907 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
908 eqv = gen_rtx_CONST (Pmode, eqv);
910 if (alpha_tls_size == 64)
912 dest = gen_reg_rtx (Pmode);
913 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
914 emit_insn (gen_adddi3 (dest, dest, scratch));
917 if (alpha_tls_size == 32)
919 insn = gen_rtx_HIGH (Pmode, eqv);
920 insn = gen_rtx_PLUS (Pmode, scratch, insn);
921 scratch = gen_reg_rtx (Pmode);
922 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
924 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
926 case TLS_MODEL_INITIAL_EXEC:
927 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
928 eqv = gen_rtx_CONST (Pmode, eqv);
929 tp = gen_reg_rtx (Pmode);
930 scratch = gen_reg_rtx (Pmode);
931 dest = gen_reg_rtx (Pmode);
933 emit_insn (gen_load_tp (tp));
934 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
935 emit_insn (gen_adddi3 (dest, tp, scratch));
938 case TLS_MODEL_LOCAL_EXEC:
939 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
940 eqv = gen_rtx_CONST (Pmode, eqv);
941 tp = gen_reg_rtx (Pmode);
943 emit_insn (gen_load_tp (tp));
944 if (alpha_tls_size == 32)
946 insn = gen_rtx_HIGH (Pmode, eqv);
947 insn = gen_rtx_PLUS (Pmode, tp, insn);
948 tp = gen_reg_rtx (Pmode);
949 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
951 return gen_rtx_LO_SUM (Pmode, tp, eqv);
957 if (local_symbolic_operand (x, Pmode))
959 if (small_symbolic_operand (x, Pmode))
963 if (can_create_pseudo_p ())
964 scratch = gen_reg_rtx (Pmode);
965 emit_insn (gen_rtx_SET (VOIDmode, scratch,
966 gen_rtx_HIGH (Pmode, x)));
967 return gen_rtx_LO_SUM (Pmode, scratch, x);
976 HOST_WIDE_INT low, high;
978 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
980 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
984 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
985 (!can_create_pseudo_p () ? scratch : NULL_RTX),
988 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
989 (!can_create_pseudo_p () ? scratch : NULL_RTX),
992 return plus_constant (x, low);
997 /* Try machine-dependent ways of modifying an illegitimate address
998 to be legitimate. Return X or the new, valid address. */
1001 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1002 enum machine_mode mode)
1004 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1005 return new_x ? new_x : x;
1008 /* Primarily this is required for TLS symbols, but given that our move
1009 patterns *ought* to be able to handle any symbol at any time, we
1010 should never be spilling symbolic operands to the constant pool, ever. */
1013 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1015 enum rtx_code code = GET_CODE (x);
1016 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1019 /* We do not allow indirect calls to be optimized into sibling calls, nor
1020 can we allow a call to a function with a different GP to be optimized
1024 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1026 /* Can't do indirect tail calls, since we don't know if the target
1027 uses the same GP. */
1031 /* Otherwise, we can make a tail call if the target function shares
1033 return decl_has_samegp (decl);
1037 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1041 /* Don't re-split. */
1042 if (GET_CODE (x) == LO_SUM)
1045 return small_symbolic_operand (x, Pmode) != 0;
1049 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1053 /* Don't re-split. */
1054 if (GET_CODE (x) == LO_SUM)
1057 if (small_symbolic_operand (x, Pmode))
1059 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1068 split_small_symbolic_operand (rtx x)
1071 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1075 /* Indicate that INSN cannot be duplicated. This is true for any insn
1076 that we've marked with gpdisp relocs, since those have to stay in
1077 1-1 correspondence with one another.
1079 Technically we could copy them if we could set up a mapping from one
1080 sequence number to another, across the set of insns to be duplicated.
1081 This seems overly complicated and error-prone since interblock motion
1082 from sched-ebb could move one of the pair of insns to a different block.
1084 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1085 then they'll be in a different block from their ldgp. Which could lead
1086 the bb reorder code to think that it would be ok to copy just the block
1087 containing the call and branch to the block containing the ldgp. */
1090 alpha_cannot_copy_insn_p (rtx insn)
1092 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1094 if (recog_memoized (insn) >= 0)
1095 return get_attr_cannot_copy (insn);
1101 /* Try a machine-dependent way of reloading an illegitimate address
1102 operand. If we find one, push the reload and return the new rtx. */
1105 alpha_legitimize_reload_address (rtx x,
1106 enum machine_mode mode ATTRIBUTE_UNUSED,
1107 int opnum, int type,
1108 int ind_levels ATTRIBUTE_UNUSED)
1110 /* We must recognize output that we have already generated ourselves. */
1111 if (GET_CODE (x) == PLUS
1112 && GET_CODE (XEXP (x, 0)) == PLUS
1113 && REG_P (XEXP (XEXP (x, 0), 0))
1114 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1115 && CONST_INT_P (XEXP (x, 1)))
1117 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1118 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1119 opnum, (enum reload_type) type);
1123 /* We wish to handle large displacements off a base register by
1124 splitting the addend across an ldah and the mem insn. This
1125 cuts number of extra insns needed from 3 to 1. */
1126 if (GET_CODE (x) == PLUS
1127 && REG_P (XEXP (x, 0))
1128 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1129 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1130 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1132 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1133 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1135 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1137 /* Check for 32-bit overflow. */
1138 if (high + low != val)
1141 /* Reload the high part into a base reg; leave the low part
1142 in the mem directly. */
1143 x = gen_rtx_PLUS (GET_MODE (x),
1144 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1148 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1149 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1150 opnum, (enum reload_type) type);
1157 /* Compute a (partial) cost for rtx X. Return true if the complete
1158 cost has been computed, and false if subexpressions should be
1159 scanned. In either case, *TOTAL contains the cost result. */
1162 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1165 enum machine_mode mode = GET_MODE (x);
1166 bool float_mode_p = FLOAT_MODE_P (mode);
1167 const struct alpha_rtx_cost_data *cost_data;
1170 cost_data = &alpha_rtx_cost_size;
1172 cost_data = &alpha_rtx_cost_data[alpha_tune];
1177 /* If this is an 8-bit constant, return zero since it can be used
1178 nearly anywhere with no cost. If it is a valid operand for an
1179 ADD or AND, likewise return 0 if we know it will be used in that
1180 context. Otherwise, return 2 since it might be used there later.
1181 All other constants take at least two insns. */
1182 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1190 if (x == CONST0_RTX (mode))
1192 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1193 || (outer_code == AND && and_operand (x, VOIDmode)))
1195 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1198 *total = COSTS_N_INSNS (2);
1204 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1205 *total = COSTS_N_INSNS (outer_code != MEM);
1206 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1207 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1208 else if (tls_symbolic_operand_type (x))
1209 /* Estimate of cost for call_pal rduniq. */
1210 /* ??? How many insns do we emit here? More than one... */
1211 *total = COSTS_N_INSNS (15);
1213 /* Otherwise we do a load from the GOT. */
1214 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1218 /* This is effectively an add_operand. */
1225 *total = cost_data->fp_add;
1226 else if (GET_CODE (XEXP (x, 0)) == MULT
1227 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1229 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1230 (enum rtx_code) outer_code, opno, speed)
1231 + rtx_cost (XEXP (x, 1),
1232 (enum rtx_code) outer_code, opno, speed)
1233 + COSTS_N_INSNS (1));
1240 *total = cost_data->fp_mult;
1241 else if (mode == DImode)
1242 *total = cost_data->int_mult_di;
1244 *total = cost_data->int_mult_si;
1248 if (CONST_INT_P (XEXP (x, 1))
1249 && INTVAL (XEXP (x, 1)) <= 3)
1251 *total = COSTS_N_INSNS (1);
1258 *total = cost_data->int_shift;
1263 *total = cost_data->fp_add;
1265 *total = cost_data->int_cmov;
1273 *total = cost_data->int_div;
1274 else if (mode == SFmode)
1275 *total = cost_data->fp_div_sf;
1277 *total = cost_data->fp_div_df;
1281 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1287 *total = COSTS_N_INSNS (1);
1295 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1301 case UNSIGNED_FLOAT:
1304 case FLOAT_TRUNCATE:
1305 *total = cost_data->fp_add;
1309 if (MEM_P (XEXP (x, 0)))
1312 *total = cost_data->fp_add;
1320 /* REF is an alignable memory location. Place an aligned SImode
1321 reference into *PALIGNED_MEM and the number of bits to shift into
1322 *PBITNUM. SCRATCH is a free register for use in reloading out
1323 of range stack slots. */
1326 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1329 HOST_WIDE_INT disp, offset;
1331 gcc_assert (MEM_P (ref));
1333 if (reload_in_progress
1334 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1336 base = find_replacement (&XEXP (ref, 0));
1337 gcc_assert (memory_address_p (GET_MODE (ref), base));
1340 base = XEXP (ref, 0);
1342 if (GET_CODE (base) == PLUS)
1343 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1347 /* Find the byte offset within an aligned word. If the memory itself is
1348 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1349 will have examined the base register and determined it is aligned, and
1350 thus displacements from it are naturally alignable. */
1351 if (MEM_ALIGN (ref) >= 32)
1356 /* The location should not cross aligned word boundary. */
1357 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1358 <= GET_MODE_SIZE (SImode));
1360 /* Access the entire aligned word. */
1361 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1363 /* Convert the byte offset within the word to a bit offset. */
1364 offset *= BITS_PER_UNIT;
1365 *pbitnum = GEN_INT (offset);
1368 /* Similar, but just get the address. Handle the two reload cases.
1369 Add EXTRA_OFFSET to the address we return. */
1372 get_unaligned_address (rtx ref)
1375 HOST_WIDE_INT offset = 0;
1377 gcc_assert (MEM_P (ref));
1379 if (reload_in_progress
1380 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1382 base = find_replacement (&XEXP (ref, 0));
1384 gcc_assert (memory_address_p (GET_MODE (ref), base));
1387 base = XEXP (ref, 0);
1389 if (GET_CODE (base) == PLUS)
1390 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1392 return plus_constant (base, offset);
1395 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1396 X is always returned in a register. */
1399 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1401 if (GET_CODE (addr) == PLUS)
1403 ofs += INTVAL (XEXP (addr, 1));
1404 addr = XEXP (addr, 0);
1407 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1408 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1411 /* On the Alpha, all (non-symbolic) constants except zero go into
1412 a floating-point register via memory. Note that we cannot
1413 return anything that is not a subset of RCLASS, and that some
1414 symbolic constants cannot be dropped to memory. */
1417 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1419 /* Zero is present in any register class. */
1420 if (x == CONST0_RTX (GET_MODE (x)))
1423 /* These sorts of constants we can easily drop to memory. */
1425 || GET_CODE (x) == CONST_DOUBLE
1426 || GET_CODE (x) == CONST_VECTOR)
1428 if (rclass == FLOAT_REGS)
1430 if (rclass == ALL_REGS)
1431 return GENERAL_REGS;
1435 /* All other kinds of constants should not (and in the case of HIGH
1436 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1437 secondary reload. */
1439 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1444 /* Inform reload about cases where moving X with a mode MODE to a register in
1445 RCLASS requires an extra scratch or immediate register. Return the class
1446 needed for the immediate register. */
1449 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1450 enum machine_mode mode, secondary_reload_info *sri)
1452 enum reg_class rclass = (enum reg_class) rclass_i;
1454 /* Loading and storing HImode or QImode values to and from memory
1455 usually requires a scratch register. */
1456 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1458 if (any_memory_operand (x, mode))
1462 if (!aligned_memory_operand (x, mode))
1463 sri->icode = direct_optab_handler (reload_in_optab, mode);
1466 sri->icode = direct_optab_handler (reload_out_optab, mode);
1471 /* We also cannot do integral arithmetic into FP regs, as might result
1472 from register elimination into a DImode fp register. */
1473 if (rclass == FLOAT_REGS)
1475 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1476 return GENERAL_REGS;
1477 if (in_p && INTEGRAL_MODE_P (mode)
1478 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1479 return GENERAL_REGS;
1485 /* Subfunction of the following function. Update the flags of any MEM
1486 found in part of X. */
1489 alpha_set_memflags_1 (rtx *xp, void *data)
1491 rtx x = *xp, orig = (rtx) data;
1496 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1497 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1498 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1500 /* Sadly, we cannot use alias sets because the extra aliasing
1501 produced by the AND interferes. Given that two-byte quantities
1502 are the only thing we would be able to differentiate anyway,
1503 there does not seem to be any point in convoluting the early
1504 out of the alias check. */
1509 /* Given SEQ, which is an INSN list, look for any MEMs in either
1510 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1511 volatile flags from REF into each of the MEMs found. If REF is not
1512 a MEM, don't do anything. */
1515 alpha_set_memflags (rtx seq, rtx ref)
1522 /* This is only called from alpha.md, after having had something
1523 generated from one of the insn patterns. So if everything is
1524 zero, the pattern is already up-to-date. */
1525 if (!MEM_VOLATILE_P (ref)
1526 && !MEM_NOTRAP_P (ref)
1527 && !MEM_READONLY_P (ref))
1530 for (insn = seq; insn; insn = NEXT_INSN (insn))
1532 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1537 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1540 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1541 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1542 and return pc_rtx if successful. */
1545 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1546 HOST_WIDE_INT c, int n, bool no_output)
1548 HOST_WIDE_INT new_const;
1550 /* Use a pseudo if highly optimizing and still generating RTL. */
1552 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1555 /* If this is a sign-extended 32-bit constant, we can do this in at most
1556 three insns, so do it if we have enough insns left. We always have
1557 a sign-extended 32-bit constant when compiling on a narrow machine. */
1559 if (HOST_BITS_PER_WIDE_INT != 64
1560 || c >> 31 == -1 || c >> 31 == 0)
1562 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1563 HOST_WIDE_INT tmp1 = c - low;
1564 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1565 HOST_WIDE_INT extra = 0;
1567 /* If HIGH will be interpreted as negative but the constant is
1568 positive, we must adjust it to do two ldha insns. */
1570 if ((high & 0x8000) != 0 && c >= 0)
1574 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1577 if (c == low || (low == 0 && extra == 0))
1579 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1580 but that meant that we can't handle INT_MIN on 32-bit machines
1581 (like NT/Alpha), because we recurse indefinitely through
1582 emit_move_insn to gen_movdi. So instead, since we know exactly
1583 what we want, create it explicitly. */
1588 target = gen_reg_rtx (mode);
1589 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1592 else if (n >= 2 + (extra != 0))
1596 if (!can_create_pseudo_p ())
1598 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1602 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1605 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1606 This means that if we go through expand_binop, we'll try to
1607 generate extensions, etc, which will require new pseudos, which
1608 will fail during some split phases. The SImode add patterns
1609 still exist, but are not named. So build the insns by hand. */
1614 subtarget = gen_reg_rtx (mode);
1615 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1616 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1622 target = gen_reg_rtx (mode);
1623 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1624 insn = gen_rtx_SET (VOIDmode, target, insn);
1630 /* If we couldn't do it that way, try some other methods. But if we have
1631 no instructions left, don't bother. Likewise, if this is SImode and
1632 we can't make pseudos, we can't do anything since the expand_binop
1633 and expand_unop calls will widen and try to make pseudos. */
1635 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1638 /* Next, see if we can load a related constant and then shift and possibly
1639 negate it to get the constant we want. Try this once each increasing
1640 numbers of insns. */
1642 for (i = 1; i < n; i++)
1644 /* First, see if minus some low bits, we've an easy load of
1647 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1650 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1655 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1656 target, 0, OPTAB_WIDEN);
1660 /* Next try complementing. */
1661 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1666 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1669 /* Next try to form a constant and do a left shift. We can do this
1670 if some low-order bits are zero; the exact_log2 call below tells
1671 us that information. The bits we are shifting out could be any
1672 value, but here we'll just try the 0- and sign-extended forms of
1673 the constant. To try to increase the chance of having the same
1674 constant in more than one insn, start at the highest number of
1675 bits to shift, but try all possibilities in case a ZAPNOT will
1678 bits = exact_log2 (c & -c);
1680 for (; bits > 0; bits--)
1682 new_const = c >> bits;
1683 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1686 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1687 temp = alpha_emit_set_const (subtarget, mode, new_const,
1694 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1695 target, 0, OPTAB_WIDEN);
1699 /* Now try high-order zero bits. Here we try the shifted-in bits as
1700 all zero and all ones. Be careful to avoid shifting outside the
1701 mode and to avoid shifting outside the host wide int size. */
1702 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1703 confuse the recursive call and set all of the high 32 bits. */
1705 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1706 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1708 for (; bits > 0; bits--)
1710 new_const = c << bits;
1711 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1714 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1715 temp = alpha_emit_set_const (subtarget, mode, new_const,
1722 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1723 target, 1, OPTAB_WIDEN);
1727 /* Now try high-order 1 bits. We get that with a sign-extension.
1728 But one bit isn't enough here. Be careful to avoid shifting outside
1729 the mode and to avoid shifting outside the host wide int size. */
1731 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1732 - floor_log2 (~ c) - 2);
1734 for (; bits > 0; bits--)
1736 new_const = c << bits;
1737 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1740 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1741 temp = alpha_emit_set_const (subtarget, mode, new_const,
1748 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1749 target, 0, OPTAB_WIDEN);
1754 #if HOST_BITS_PER_WIDE_INT == 64
1755 /* Finally, see if can load a value into the target that is the same as the
1756 constant except that all bytes that are 0 are changed to be 0xff. If we
1757 can, then we can do a ZAPNOT to obtain the desired constant. */
1760 for (i = 0; i < 64; i += 8)
1761 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1762 new_const |= (HOST_WIDE_INT) 0xff << i;
1764 /* We are only called for SImode and DImode. If this is SImode, ensure that
1765 we are sign extended to a full word. */
1768 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1772 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1777 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1778 target, 0, OPTAB_WIDEN);
1786 /* Try to output insns to set TARGET equal to the constant C if it can be
1787 done in less than N insns. Do all computations in MODE. Returns the place
1788 where the output has been placed if it can be done and the insns have been
1789 emitted. If it would take more than N insns, zero is returned and no
1790 insns and emitted. */
1793 alpha_emit_set_const (rtx target, enum machine_mode mode,
1794 HOST_WIDE_INT c, int n, bool no_output)
1796 enum machine_mode orig_mode = mode;
1797 rtx orig_target = target;
1801 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1802 can't load this constant in one insn, do this in DImode. */
1803 if (!can_create_pseudo_p () && mode == SImode
1804 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1806 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1810 target = no_output ? NULL : gen_lowpart (DImode, target);
1813 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1815 target = no_output ? NULL : gen_lowpart (DImode, target);
1819 /* Try 1 insn, then 2, then up to N. */
1820 for (i = 1; i <= n; i++)
1822 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1830 insn = get_last_insn ();
1831 set = single_set (insn);
1832 if (! CONSTANT_P (SET_SRC (set)))
1833 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1838 /* Allow for the case where we changed the mode of TARGET. */
1841 if (result == target)
1842 result = orig_target;
1843 else if (mode != orig_mode)
1844 result = gen_lowpart (orig_mode, result);
1850 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1851 fall back to a straight forward decomposition. We do this to avoid
1852 exponential run times encountered when looking for longer sequences
1853 with alpha_emit_set_const. */
1856 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1858 HOST_WIDE_INT d1, d2, d3, d4;
1860 /* Decompose the entire word */
1861 #if HOST_BITS_PER_WIDE_INT >= 64
1862 gcc_assert (c2 == -(c1 < 0));
1863 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1865 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1866 c1 = (c1 - d2) >> 32;
1867 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1869 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1870 gcc_assert (c1 == d4);
1872 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1874 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1875 gcc_assert (c1 == d2);
1877 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1879 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1880 gcc_assert (c2 == d4);
1883 /* Construct the high word */
1886 emit_move_insn (target, GEN_INT (d4));
1888 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1891 emit_move_insn (target, GEN_INT (d3));
1893 /* Shift it into place */
1894 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1896 /* Add in the low bits. */
1898 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1900 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1905 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
1909 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
1911 HOST_WIDE_INT i0, i1;
1913 if (GET_CODE (x) == CONST_VECTOR)
1914 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
1917 if (CONST_INT_P (x))
1922 else if (HOST_BITS_PER_WIDE_INT >= 64)
1924 i0 = CONST_DOUBLE_LOW (x);
1929 i0 = CONST_DOUBLE_LOW (x);
1930 i1 = CONST_DOUBLE_HIGH (x);
1937 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
1938 we are willing to load the value into a register via a move pattern.
1939 Normally this is all symbolic constants, integral constants that
1940 take three or fewer instructions, and floating-point zero. */
1943 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
1945 HOST_WIDE_INT i0, i1;
1947 switch (GET_CODE (x))
1954 if (GET_CODE (XEXP (x, 0)) == PLUS
1955 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
1956 x = XEXP (XEXP (x, 0), 0);
1960 if (GET_CODE (x) != SYMBOL_REF)
1966 /* TLS symbols are never valid. */
1967 return SYMBOL_REF_TLS_MODEL (x) == 0;
1970 if (x == CONST0_RTX (mode))
1972 if (FLOAT_MODE_P (mode))
1977 if (x == CONST0_RTX (mode))
1979 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
1981 if (GET_MODE_SIZE (mode) != 8)
1987 if (TARGET_BUILD_CONSTANTS)
1989 alpha_extract_integer (x, &i0, &i1);
1990 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
1991 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
1999 /* Operand 1 is known to be a constant, and should require more than one
2000 instruction to load. Emit that multi-part load. */
2003 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2005 HOST_WIDE_INT i0, i1;
2006 rtx temp = NULL_RTX;
2008 alpha_extract_integer (operands[1], &i0, &i1);
2010 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2011 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2013 if (!temp && TARGET_BUILD_CONSTANTS)
2014 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2018 if (!rtx_equal_p (operands[0], temp))
2019 emit_move_insn (operands[0], temp);
2026 /* Expand a move instruction; return true if all work is done.
2027 We don't handle non-bwx subword loads here. */
2030 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2034 /* If the output is not a register, the input must be. */
2035 if (MEM_P (operands[0])
2036 && ! reg_or_0_operand (operands[1], mode))
2037 operands[1] = force_reg (mode, operands[1]);
2039 /* Allow legitimize_address to perform some simplifications. */
2040 if (mode == Pmode && symbolic_operand (operands[1], mode))
2042 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2045 if (tmp == operands[0])
2052 /* Early out for non-constants and valid constants. */
2053 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2056 /* Split large integers. */
2057 if (CONST_INT_P (operands[1])
2058 || GET_CODE (operands[1]) == CONST_DOUBLE
2059 || GET_CODE (operands[1]) == CONST_VECTOR)
2061 if (alpha_split_const_mov (mode, operands))
2065 /* Otherwise we've nothing left but to drop the thing to memory. */
2066 tmp = force_const_mem (mode, operands[1]);
2068 if (tmp == NULL_RTX)
2071 if (reload_in_progress)
2073 emit_move_insn (operands[0], XEXP (tmp, 0));
2074 operands[1] = replace_equiv_address (tmp, operands[0]);
2077 operands[1] = validize_mem (tmp);
2081 /* Expand a non-bwx QImode or HImode move instruction;
2082 return true if all work is done. */
2085 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2089 /* If the output is not a register, the input must be. */
2090 if (MEM_P (operands[0]))
2091 operands[1] = force_reg (mode, operands[1]);
2093 /* Handle four memory cases, unaligned and aligned for either the input
2094 or the output. The only case where we can be called during reload is
2095 for aligned loads; all other cases require temporaries. */
2097 if (any_memory_operand (operands[1], mode))
2099 if (aligned_memory_operand (operands[1], mode))
2101 if (reload_in_progress)
2104 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2106 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2111 rtx aligned_mem, bitnum;
2112 rtx scratch = gen_reg_rtx (SImode);
2116 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2118 subtarget = operands[0];
2119 if (REG_P (subtarget))
2120 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2122 subtarget = gen_reg_rtx (DImode), copyout = true;
2125 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2128 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2133 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2138 /* Don't pass these as parameters since that makes the generated
2139 code depend on parameter evaluation order which will cause
2140 bootstrap failures. */
2142 rtx temp1, temp2, subtarget, ua;
2145 temp1 = gen_reg_rtx (DImode);
2146 temp2 = gen_reg_rtx (DImode);
2148 subtarget = operands[0];
2149 if (REG_P (subtarget))
2150 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2152 subtarget = gen_reg_rtx (DImode), copyout = true;
2154 ua = get_unaligned_address (operands[1]);
2156 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2158 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2160 alpha_set_memflags (seq, operands[1]);
2164 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2169 if (any_memory_operand (operands[0], mode))
2171 if (aligned_memory_operand (operands[0], mode))
2173 rtx aligned_mem, bitnum;
2174 rtx temp1 = gen_reg_rtx (SImode);
2175 rtx temp2 = gen_reg_rtx (SImode);
2177 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2179 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2184 rtx temp1 = gen_reg_rtx (DImode);
2185 rtx temp2 = gen_reg_rtx (DImode);
2186 rtx temp3 = gen_reg_rtx (DImode);
2187 rtx ua = get_unaligned_address (operands[0]);
2190 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2192 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2194 alpha_set_memflags (seq, operands[0]);
2203 /* Implement the movmisalign patterns. One of the operands is a memory
2204 that is not naturally aligned. Emit instructions to load it. */
2207 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2209 /* Honor misaligned loads, for those we promised to do so. */
2210 if (MEM_P (operands[1]))
2214 if (register_operand (operands[0], mode))
2217 tmp = gen_reg_rtx (mode);
2219 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2220 if (tmp != operands[0])
2221 emit_move_insn (operands[0], tmp);
2223 else if (MEM_P (operands[0]))
2225 if (!reg_or_0_operand (operands[1], mode))
2226 operands[1] = force_reg (mode, operands[1]);
2227 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2233 /* Generate an unsigned DImode to FP conversion. This is the same code
2234 optabs would emit if we didn't have TFmode patterns.
2236 For SFmode, this is the only construction I've found that can pass
2237 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2238 intermediates will work, because you'll get intermediate rounding
2239 that ruins the end result. Some of this could be fixed by turning
2240 on round-to-positive-infinity, but that requires diddling the fpsr,
2241 which kills performance. I tried turning this around and converting
2242 to a negative number, so that I could turn on /m, but either I did
2243 it wrong or there's something else cause I wound up with the exact
2244 same single-bit error. There is a branch-less form of this same code:
2255 fcmoveq $f10,$f11,$f0
2257 I'm not using it because it's the same number of instructions as
2258 this branch-full form, and it has more serialized long latency
2259 instructions on the critical path.
2261 For DFmode, we can avoid rounding errors by breaking up the word
2262 into two pieces, converting them separately, and adding them back:
2264 LC0: .long 0,0x5f800000
2269 cpyse $f11,$f31,$f10
2270 cpyse $f31,$f11,$f11
2278 This doesn't seem to be a clear-cut win over the optabs form.
2279 It probably all depends on the distribution of numbers being
2280 converted -- in the optabs form, all but high-bit-set has a
2281 much lower minimum execution time. */
2284 alpha_emit_floatuns (rtx operands[2])
2286 rtx neglab, donelab, i0, i1, f0, in, out;
2287 enum machine_mode mode;
2290 in = force_reg (DImode, operands[1]);
2291 mode = GET_MODE (out);
2292 neglab = gen_label_rtx ();
2293 donelab = gen_label_rtx ();
2294 i0 = gen_reg_rtx (DImode);
2295 i1 = gen_reg_rtx (DImode);
2296 f0 = gen_reg_rtx (mode);
2298 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2300 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2301 emit_jump_insn (gen_jump (donelab));
2304 emit_label (neglab);
2306 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2307 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2308 emit_insn (gen_iordi3 (i0, i0, i1));
2309 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2310 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2312 emit_label (donelab);
2315 /* Generate the comparison for a conditional branch. */
2318 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2320 enum rtx_code cmp_code, branch_code;
2321 enum machine_mode branch_mode = VOIDmode;
2322 enum rtx_code code = GET_CODE (operands[0]);
2323 rtx op0 = operands[1], op1 = operands[2];
2326 if (cmp_mode == TFmode)
2328 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2333 /* The general case: fold the comparison code to the types of compares
2334 that we have, choosing the branch as necessary. */
2337 case EQ: case LE: case LT: case LEU: case LTU:
2339 /* We have these compares. */
2340 cmp_code = code, branch_code = NE;
2345 /* These must be reversed. */
2346 cmp_code = reverse_condition (code), branch_code = EQ;
2349 case GE: case GT: case GEU: case GTU:
2350 /* For FP, we swap them, for INT, we reverse them. */
2351 if (cmp_mode == DFmode)
2353 cmp_code = swap_condition (code);
2355 tem = op0, op0 = op1, op1 = tem;
2359 cmp_code = reverse_condition (code);
2368 if (cmp_mode == DFmode)
2370 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2372 /* When we are not as concerned about non-finite values, and we
2373 are comparing against zero, we can branch directly. */
2374 if (op1 == CONST0_RTX (DFmode))
2375 cmp_code = UNKNOWN, branch_code = code;
2376 else if (op0 == CONST0_RTX (DFmode))
2378 /* Undo the swap we probably did just above. */
2379 tem = op0, op0 = op1, op1 = tem;
2380 branch_code = swap_condition (cmp_code);
2386 /* ??? We mark the branch mode to be CCmode to prevent the
2387 compare and branch from being combined, since the compare
2388 insn follows IEEE rules that the branch does not. */
2389 branch_mode = CCmode;
2394 /* The following optimizations are only for signed compares. */
2395 if (code != LEU && code != LTU && code != GEU && code != GTU)
2397 /* Whee. Compare and branch against 0 directly. */
2398 if (op1 == const0_rtx)
2399 cmp_code = UNKNOWN, branch_code = code;
2401 /* If the constants doesn't fit into an immediate, but can
2402 be generated by lda/ldah, we adjust the argument and
2403 compare against zero, so we can use beq/bne directly. */
2404 /* ??? Don't do this when comparing against symbols, otherwise
2405 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2406 be declared false out of hand (at least for non-weak). */
2407 else if (CONST_INT_P (op1)
2408 && (code == EQ || code == NE)
2409 && !(symbolic_operand (op0, VOIDmode)
2410 || (REG_P (op0) && REG_POINTER (op0))))
2412 rtx n_op1 = GEN_INT (-INTVAL (op1));
2414 if (! satisfies_constraint_I (op1)
2415 && (satisfies_constraint_K (n_op1)
2416 || satisfies_constraint_L (n_op1)))
2417 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2421 if (!reg_or_0_operand (op0, DImode))
2422 op0 = force_reg (DImode, op0);
2423 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2424 op1 = force_reg (DImode, op1);
2427 /* Emit an initial compare instruction, if necessary. */
2429 if (cmp_code != UNKNOWN)
2431 tem = gen_reg_rtx (cmp_mode);
2432 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2435 /* Emit the branch instruction. */
2436 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2437 gen_rtx_IF_THEN_ELSE (VOIDmode,
2438 gen_rtx_fmt_ee (branch_code,
2440 CONST0_RTX (cmp_mode)),
2441 gen_rtx_LABEL_REF (VOIDmode,
2444 emit_jump_insn (tem);
2447 /* Certain simplifications can be done to make invalid setcc operations
2448 valid. Return the final comparison, or NULL if we can't work. */
2451 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2453 enum rtx_code cmp_code;
2454 enum rtx_code code = GET_CODE (operands[1]);
2455 rtx op0 = operands[2], op1 = operands[3];
2458 if (cmp_mode == TFmode)
2460 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2465 if (cmp_mode == DFmode && !TARGET_FIX)
2468 /* The general case: fold the comparison code to the types of compares
2469 that we have, choosing the branch as necessary. */
2474 case EQ: case LE: case LT: case LEU: case LTU:
2476 /* We have these compares. */
2477 if (cmp_mode == DFmode)
2478 cmp_code = code, code = NE;
2482 if (cmp_mode == DImode && op1 == const0_rtx)
2487 cmp_code = reverse_condition (code);
2491 case GE: case GT: case GEU: case GTU:
2492 /* These normally need swapping, but for integer zero we have
2493 special patterns that recognize swapped operands. */
2494 if (cmp_mode == DImode && op1 == const0_rtx)
2496 code = swap_condition (code);
2497 if (cmp_mode == DFmode)
2498 cmp_code = code, code = NE;
2499 tmp = op0, op0 = op1, op1 = tmp;
2506 if (cmp_mode == DImode)
2508 if (!register_operand (op0, DImode))
2509 op0 = force_reg (DImode, op0);
2510 if (!reg_or_8bit_operand (op1, DImode))
2511 op1 = force_reg (DImode, op1);
2514 /* Emit an initial compare instruction, if necessary. */
2515 if (cmp_code != UNKNOWN)
2517 tmp = gen_reg_rtx (cmp_mode);
2518 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2519 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2521 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2525 /* Emit the setcc instruction. */
2526 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2527 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2532 /* Rewrite a comparison against zero CMP of the form
2533 (CODE (cc0) (const_int 0)) so it can be written validly in
2534 a conditional move (if_then_else CMP ...).
2535 If both of the operands that set cc0 are nonzero we must emit
2536 an insn to perform the compare (it can't be done within
2537 the conditional move). */
2540 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2542 enum rtx_code code = GET_CODE (cmp);
2543 enum rtx_code cmov_code = NE;
2544 rtx op0 = XEXP (cmp, 0);
2545 rtx op1 = XEXP (cmp, 1);
2546 enum machine_mode cmp_mode
2547 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2548 enum machine_mode cmov_mode = VOIDmode;
2549 int local_fast_math = flag_unsafe_math_optimizations;
2552 if (cmp_mode == TFmode)
2554 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2559 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2561 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2563 enum rtx_code cmp_code;
2568 /* If we have fp<->int register move instructions, do a cmov by
2569 performing the comparison in fp registers, and move the
2570 zero/nonzero value to integer registers, where we can then
2571 use a normal cmov, or vice-versa. */
2575 case EQ: case LE: case LT: case LEU: case LTU:
2577 /* We have these compares. */
2578 cmp_code = code, code = NE;
2583 /* These must be reversed. */
2584 cmp_code = reverse_condition (code), code = EQ;
2587 case GE: case GT: case GEU: case GTU:
2588 /* These normally need swapping, but for integer zero we have
2589 special patterns that recognize swapped operands. */
2590 if (cmp_mode == DImode && op1 == const0_rtx)
2591 cmp_code = code, code = NE;
2594 cmp_code = swap_condition (code);
2596 tem = op0, op0 = op1, op1 = tem;
2604 if (cmp_mode == DImode)
2606 if (!reg_or_0_operand (op0, DImode))
2607 op0 = force_reg (DImode, op0);
2608 if (!reg_or_8bit_operand (op1, DImode))
2609 op1 = force_reg (DImode, op1);
2612 tem = gen_reg_rtx (cmp_mode);
2613 emit_insn (gen_rtx_SET (VOIDmode, tem,
2614 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2617 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2618 op0 = gen_lowpart (cmp_mode, tem);
2619 op1 = CONST0_RTX (cmp_mode);
2620 local_fast_math = 1;
2623 if (cmp_mode == DImode)
2625 if (!reg_or_0_operand (op0, DImode))
2626 op0 = force_reg (DImode, op0);
2627 if (!reg_or_8bit_operand (op1, DImode))
2628 op1 = force_reg (DImode, op1);
2631 /* We may be able to use a conditional move directly.
2632 This avoids emitting spurious compares. */
2633 if (signed_comparison_operator (cmp, VOIDmode)
2634 && (cmp_mode == DImode || local_fast_math)
2635 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2636 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2638 /* We can't put the comparison inside the conditional move;
2639 emit a compare instruction and put that inside the
2640 conditional move. Make sure we emit only comparisons we have;
2641 swap or reverse as necessary. */
2643 if (!can_create_pseudo_p ())
2648 case EQ: case LE: case LT: case LEU: case LTU:
2650 /* We have these compares: */
2655 /* These must be reversed. */
2656 code = reverse_condition (code);
2660 case GE: case GT: case GEU: case GTU:
2661 /* These normally need swapping, but for integer zero we have
2662 special patterns that recognize swapped operands. */
2663 if (cmp_mode == DImode && op1 == const0_rtx)
2665 code = swap_condition (code);
2666 tem = op0, op0 = op1, op1 = tem;
2673 if (cmp_mode == DImode)
2675 if (!reg_or_0_operand (op0, DImode))
2676 op0 = force_reg (DImode, op0);
2677 if (!reg_or_8bit_operand (op1, DImode))
2678 op1 = force_reg (DImode, op1);
2681 /* ??? We mark the branch mode to be CCmode to prevent the compare
2682 and cmov from being combined, since the compare insn follows IEEE
2683 rules that the cmov does not. */
2684 if (cmp_mode == DFmode && !local_fast_math)
2687 tem = gen_reg_rtx (cmp_mode);
2688 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2689 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2692 /* Simplify a conditional move of two constants into a setcc with
2693 arithmetic. This is done with a splitter since combine would
2694 just undo the work if done during code generation. It also catches
2695 cases we wouldn't have before cse. */
2698 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2699 rtx t_rtx, rtx f_rtx)
2701 HOST_WIDE_INT t, f, diff;
2702 enum machine_mode mode;
2703 rtx target, subtarget, tmp;
2705 mode = GET_MODE (dest);
2710 if (((code == NE || code == EQ) && diff < 0)
2711 || (code == GE || code == GT))
2713 code = reverse_condition (code);
2714 diff = t, t = f, f = diff;
2718 subtarget = target = dest;
2721 target = gen_lowpart (DImode, dest);
2722 if (can_create_pseudo_p ())
2723 subtarget = gen_reg_rtx (DImode);
2727 /* Below, we must be careful to use copy_rtx on target and subtarget
2728 in intermediate insns, as they may be a subreg rtx, which may not
2731 if (f == 0 && exact_log2 (diff) > 0
2732 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2733 viable over a longer latency cmove. On EV5, the E0 slot is a
2734 scarce resource, and on EV4 shift has the same latency as a cmove. */
2735 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2737 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2738 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2740 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2741 GEN_INT (exact_log2 (t)));
2742 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2744 else if (f == 0 && t == -1)
2746 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2747 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2749 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2751 else if (diff == 1 || diff == 4 || diff == 8)
2755 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2756 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2759 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2762 add_op = GEN_INT (f);
2763 if (sext_add_operand (add_op, mode))
2765 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2767 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2768 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2780 /* Look up the function X_floating library function name for the
2783 struct GTY(()) xfloating_op
2785 const enum rtx_code code;
2786 const char *const GTY((skip)) osf_func;
2787 const char *const GTY((skip)) vms_func;
2791 static GTY(()) struct xfloating_op xfloating_ops[] =
2793 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2794 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2795 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2796 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2797 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2798 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2799 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2800 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2801 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2802 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2803 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2804 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2805 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2806 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2807 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2810 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2812 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2813 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2817 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2819 struct xfloating_op *ops = xfloating_ops;
2820 long n = ARRAY_SIZE (xfloating_ops);
2823 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2825 /* How irritating. Nothing to key off for the main table. */
2826 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2829 n = ARRAY_SIZE (vax_cvt_ops);
2832 for (i = 0; i < n; ++i, ++ops)
2833 if (ops->code == code)
2835 rtx func = ops->libcall;
2838 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2839 ? ops->vms_func : ops->osf_func);
2840 ops->libcall = func;
2848 /* Most X_floating operations take the rounding mode as an argument.
2849 Compute that here. */
2852 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2853 enum alpha_fp_rounding_mode round)
2859 case ALPHA_FPRM_NORM:
2862 case ALPHA_FPRM_MINF:
2865 case ALPHA_FPRM_CHOP:
2868 case ALPHA_FPRM_DYN:
2874 /* XXX For reference, round to +inf is mode = 3. */
2877 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2883 /* Emit an X_floating library function call.
2885 Note that these functions do not follow normal calling conventions:
2886 TFmode arguments are passed in two integer registers (as opposed to
2887 indirect); TFmode return values appear in R16+R17.
2889 FUNC is the function to call.
2890 TARGET is where the output belongs.
2891 OPERANDS are the inputs.
2892 NOPERANDS is the count of inputs.
2893 EQUIV is the expression equivalent for the function.
2897 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2898 int noperands, rtx equiv)
2900 rtx usage = NULL_RTX, tmp, reg;
2905 for (i = 0; i < noperands; ++i)
2907 switch (GET_MODE (operands[i]))
2910 reg = gen_rtx_REG (TFmode, regno);
2915 reg = gen_rtx_REG (DFmode, regno + 32);
2920 gcc_assert (CONST_INT_P (operands[i]));
2923 reg = gen_rtx_REG (DImode, regno);
2931 emit_move_insn (reg, operands[i]);
2932 use_reg (&usage, reg);
2935 switch (GET_MODE (target))
2938 reg = gen_rtx_REG (TFmode, 16);
2941 reg = gen_rtx_REG (DFmode, 32);
2944 reg = gen_rtx_REG (DImode, 0);
2950 tmp = gen_rtx_MEM (QImode, func);
2951 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2952 const0_rtx, const0_rtx));
2953 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2954 RTL_CONST_CALL_P (tmp) = 1;
2959 emit_libcall_block (tmp, target, reg, equiv);
2962 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
2965 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
2969 rtx out_operands[3];
2971 func = alpha_lookup_xfloating_lib_func (code);
2972 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2974 out_operands[0] = operands[1];
2975 out_operands[1] = operands[2];
2976 out_operands[2] = GEN_INT (mode);
2977 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
2978 gen_rtx_fmt_ee (code, TFmode, operands[1],
2982 /* Emit an X_floating library function call for a comparison. */
2985 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
2987 enum rtx_code cmp_code, res_code;
2988 rtx func, out, operands[2], note;
2990 /* X_floating library comparison functions return
2994 Convert the compare against the raw return value. */
3022 func = alpha_lookup_xfloating_lib_func (cmp_code);
3026 out = gen_reg_rtx (DImode);
3028 /* What's actually returned is -1,0,1, not a proper boolean value. */
3029 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3030 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3031 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3036 /* Emit an X_floating library function call for a conversion. */
3039 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3041 int noperands = 1, mode;
3042 rtx out_operands[2];
3044 enum rtx_code code = orig_code;
3046 if (code == UNSIGNED_FIX)
3049 func = alpha_lookup_xfloating_lib_func (code);
3051 out_operands[0] = operands[1];
3056 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3057 out_operands[1] = GEN_INT (mode);
3060 case FLOAT_TRUNCATE:
3061 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3062 out_operands[1] = GEN_INT (mode);
3069 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3070 gen_rtx_fmt_e (orig_code,
3071 GET_MODE (operands[0]),
3075 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3076 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3077 guarantee that the sequence
3080 is valid. Naturally, output operand ordering is little-endian.
3081 This is used by *movtf_internal and *movti_internal. */
3084 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3087 switch (GET_CODE (operands[1]))
3090 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3091 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3095 operands[3] = adjust_address (operands[1], DImode, 8);
3096 operands[2] = adjust_address (operands[1], DImode, 0);
3101 gcc_assert (operands[1] == CONST0_RTX (mode));
3102 operands[2] = operands[3] = const0_rtx;
3109 switch (GET_CODE (operands[0]))
3112 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3113 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3117 operands[1] = adjust_address (operands[0], DImode, 8);
3118 operands[0] = adjust_address (operands[0], DImode, 0);
3125 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3128 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3129 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3133 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3134 op2 is a register containing the sign bit, operation is the
3135 logical operation to be performed. */
3138 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3140 rtx high_bit = operands[2];
3144 alpha_split_tmode_pair (operands, TFmode, false);
3146 /* Detect three flavors of operand overlap. */
3148 if (rtx_equal_p (operands[0], operands[2]))
3150 else if (rtx_equal_p (operands[1], operands[2]))
3152 if (rtx_equal_p (operands[0], high_bit))
3159 emit_move_insn (operands[0], operands[2]);
3161 /* ??? If the destination overlaps both source tf and high_bit, then
3162 assume source tf is dead in its entirety and use the other half
3163 for a scratch register. Otherwise "scratch" is just the proper
3164 destination register. */
3165 scratch = operands[move < 2 ? 1 : 3];
3167 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3171 emit_move_insn (operands[0], operands[2]);
3173 emit_move_insn (operands[1], scratch);
3177 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3181 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3182 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3183 lda r3,X(r11) lda r3,X+2(r11)
3184 extwl r1,r3,r1 extql r1,r3,r1
3185 extwh r2,r3,r2 extqh r2,r3,r2
3186 or r1.r2.r1 or r1,r2,r1
3189 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3190 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3191 lda r3,X(r11) lda r3,X(r11)
3192 extll r1,r3,r1 extll r1,r3,r1
3193 extlh r2,r3,r2 extlh r2,r3,r2
3194 or r1.r2.r1 addl r1,r2,r1
3196 quad: ldq_u r1,X(r11)
3205 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3206 HOST_WIDE_INT ofs, int sign)
3208 rtx meml, memh, addr, extl, exth, tmp, mema;
3209 enum machine_mode mode;
3211 if (TARGET_BWX && size == 2)
3213 meml = adjust_address (mem, QImode, ofs);
3214 memh = adjust_address (mem, QImode, ofs+1);
3215 extl = gen_reg_rtx (DImode);
3216 exth = gen_reg_rtx (DImode);
3217 emit_insn (gen_zero_extendqidi2 (extl, meml));
3218 emit_insn (gen_zero_extendqidi2 (exth, memh));
3219 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3220 NULL, 1, OPTAB_LIB_WIDEN);
3221 addr = expand_simple_binop (DImode, IOR, extl, exth,
3222 NULL, 1, OPTAB_LIB_WIDEN);
3224 if (sign && GET_MODE (tgt) != HImode)
3226 addr = gen_lowpart (HImode, addr);
3227 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3231 if (GET_MODE (tgt) != DImode)
3232 addr = gen_lowpart (GET_MODE (tgt), addr);
3233 emit_move_insn (tgt, addr);
3238 meml = gen_reg_rtx (DImode);
3239 memh = gen_reg_rtx (DImode);
3240 addr = gen_reg_rtx (DImode);
3241 extl = gen_reg_rtx (DImode);
3242 exth = gen_reg_rtx (DImode);
3244 mema = XEXP (mem, 0);
3245 if (GET_CODE (mema) == LO_SUM)
3246 mema = force_reg (Pmode, mema);
3248 /* AND addresses cannot be in any alias set, since they may implicitly
3249 alias surrounding code. Ideally we'd have some alias set that
3250 covered all types except those with alignment 8 or higher. */
3252 tmp = change_address (mem, DImode,
3253 gen_rtx_AND (DImode,
3254 plus_constant (mema, ofs),
3256 set_mem_alias_set (tmp, 0);
3257 emit_move_insn (meml, tmp);
3259 tmp = change_address (mem, DImode,
3260 gen_rtx_AND (DImode,
3261 plus_constant (mema, ofs + size - 1),
3263 set_mem_alias_set (tmp, 0);
3264 emit_move_insn (memh, tmp);
3266 if (sign && size == 2)
3268 emit_move_insn (addr, plus_constant (mema, ofs+2));
3270 emit_insn (gen_extql (extl, meml, addr));
3271 emit_insn (gen_extqh (exth, memh, addr));
3273 /* We must use tgt here for the target. Alpha-vms port fails if we use
3274 addr for the target, because addr is marked as a pointer and combine
3275 knows that pointers are always sign-extended 32-bit values. */
3276 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3277 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3278 addr, 1, OPTAB_WIDEN);
3282 emit_move_insn (addr, plus_constant (mema, ofs));
3283 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3287 emit_insn (gen_extwh (exth, memh, addr));
3291 emit_insn (gen_extlh (exth, memh, addr));
3295 emit_insn (gen_extqh (exth, memh, addr));
3302 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3303 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3308 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3311 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3314 alpha_expand_unaligned_store (rtx dst, rtx src,
3315 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3317 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3319 if (TARGET_BWX && size == 2)
3321 if (src != const0_rtx)
3323 dstl = gen_lowpart (QImode, src);
3324 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3325 NULL, 1, OPTAB_LIB_WIDEN);
3326 dsth = gen_lowpart (QImode, dsth);
3329 dstl = dsth = const0_rtx;
3331 meml = adjust_address (dst, QImode, ofs);
3332 memh = adjust_address (dst, QImode, ofs+1);
3334 emit_move_insn (meml, dstl);
3335 emit_move_insn (memh, dsth);
3339 dstl = gen_reg_rtx (DImode);
3340 dsth = gen_reg_rtx (DImode);
3341 insl = gen_reg_rtx (DImode);
3342 insh = gen_reg_rtx (DImode);
3344 dsta = XEXP (dst, 0);
3345 if (GET_CODE (dsta) == LO_SUM)
3346 dsta = force_reg (Pmode, dsta);
3348 /* AND addresses cannot be in any alias set, since they may implicitly
3349 alias surrounding code. Ideally we'd have some alias set that
3350 covered all types except those with alignment 8 or higher. */
3352 meml = change_address (dst, DImode,
3353 gen_rtx_AND (DImode,
3354 plus_constant (dsta, ofs),
3356 set_mem_alias_set (meml, 0);
3358 memh = change_address (dst, DImode,
3359 gen_rtx_AND (DImode,
3360 plus_constant (dsta, ofs + size - 1),
3362 set_mem_alias_set (memh, 0);
3364 emit_move_insn (dsth, memh);
3365 emit_move_insn (dstl, meml);
3367 addr = copy_addr_to_reg (plus_constant (dsta, ofs));
3369 if (src != CONST0_RTX (GET_MODE (src)))
3371 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3372 GEN_INT (size*8), addr));
3377 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3380 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3383 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3390 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3395 emit_insn (gen_mskwl (dstl, dstl, addr));
3398 emit_insn (gen_mskll (dstl, dstl, addr));
3401 emit_insn (gen_mskql (dstl, dstl, addr));
3407 if (src != CONST0_RTX (GET_MODE (src)))
3409 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3410 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3413 /* Must store high before low for degenerate case of aligned. */
3414 emit_move_insn (memh, dsth);
3415 emit_move_insn (meml, dstl);
3418 /* The block move code tries to maximize speed by separating loads and
3419 stores at the expense of register pressure: we load all of the data
3420 before we store it back out. There are two secondary effects worth
3421 mentioning, that this speeds copying to/from aligned and unaligned
3422 buffers, and that it makes the code significantly easier to write. */
3424 #define MAX_MOVE_WORDS 8
3426 /* Load an integral number of consecutive unaligned quadwords. */
3429 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3430 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3432 rtx const im8 = GEN_INT (-8);
3433 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3434 rtx sreg, areg, tmp, smema;
3437 smema = XEXP (smem, 0);
3438 if (GET_CODE (smema) == LO_SUM)
3439 smema = force_reg (Pmode, smema);
3441 /* Generate all the tmp registers we need. */
3442 for (i = 0; i < words; ++i)
3444 data_regs[i] = out_regs[i];
3445 ext_tmps[i] = gen_reg_rtx (DImode);
3447 data_regs[words] = gen_reg_rtx (DImode);
3450 smem = adjust_address (smem, GET_MODE (smem), ofs);
3452 /* Load up all of the source data. */
3453 for (i = 0; i < words; ++i)
3455 tmp = change_address (smem, DImode,
3456 gen_rtx_AND (DImode,
3457 plus_constant (smema, 8*i),
3459 set_mem_alias_set (tmp, 0);
3460 emit_move_insn (data_regs[i], tmp);
3463 tmp = change_address (smem, DImode,
3464 gen_rtx_AND (DImode,