1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
47 #include "integrate.h"
50 #include "target-def.h"
52 #include "langhooks.h"
53 #include <splay-tree.h>
54 #include "cfglayout.h"
56 #include "tree-flow.h"
57 #include "tree-stdarg.h"
58 #include "tm-constrs.h"
61 /* Specify which cpu to schedule for. */
62 enum processor_type alpha_tune;
64 /* Which cpu we're generating code for. */
65 enum processor_type alpha_cpu;
67 static const char * const alpha_cpu_name[] =
72 /* Specify how accurate floating-point traps need to be. */
74 enum alpha_trap_precision alpha_tp;
76 /* Specify the floating-point rounding mode. */
78 enum alpha_fp_rounding_mode alpha_fprm;
80 /* Specify which things cause traps. */
82 enum alpha_fp_trap_mode alpha_fptm;
84 /* Nonzero if inside of a function, because the Alpha asm can't
85 handle .files inside of functions. */
87 static int inside_function = FALSE;
89 /* The number of cycles of latency we should assume on memory reads. */
91 int alpha_memory_latency = 3;
93 /* Whether the function needs the GP. */
95 static int alpha_function_needs_gp;
97 /* The alias set for prologue/epilogue register save/restore. */
99 static GTY(()) alias_set_type alpha_sr_alias_set;
101 /* The assembler name of the current function. */
103 static const char *alpha_fnname;
105 /* The next explicit relocation sequence number. */
106 extern GTY(()) int alpha_next_sequence_number;
107 int alpha_next_sequence_number = 1;
109 /* The literal and gpdisp sequence numbers for this insn, as printed
110 by %# and %* respectively. */
111 extern GTY(()) int alpha_this_literal_sequence_number;
112 extern GTY(()) int alpha_this_gpdisp_sequence_number;
113 int alpha_this_literal_sequence_number;
114 int alpha_this_gpdisp_sequence_number;
116 /* Costs of various operations on the different architectures. */
118 struct alpha_rtx_cost_data
120 unsigned char fp_add;
121 unsigned char fp_mult;
122 unsigned char fp_div_sf;
123 unsigned char fp_div_df;
124 unsigned char int_mult_si;
125 unsigned char int_mult_di;
126 unsigned char int_shift;
127 unsigned char int_cmov;
128 unsigned short int_div;
131 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
134 COSTS_N_INSNS (6), /* fp_add */
135 COSTS_N_INSNS (6), /* fp_mult */
136 COSTS_N_INSNS (34), /* fp_div_sf */
137 COSTS_N_INSNS (63), /* fp_div_df */
138 COSTS_N_INSNS (23), /* int_mult_si */
139 COSTS_N_INSNS (23), /* int_mult_di */
140 COSTS_N_INSNS (2), /* int_shift */
141 COSTS_N_INSNS (2), /* int_cmov */
142 COSTS_N_INSNS (97), /* int_div */
145 COSTS_N_INSNS (4), /* fp_add */
146 COSTS_N_INSNS (4), /* fp_mult */
147 COSTS_N_INSNS (15), /* fp_div_sf */
148 COSTS_N_INSNS (22), /* fp_div_df */
149 COSTS_N_INSNS (8), /* int_mult_si */
150 COSTS_N_INSNS (12), /* int_mult_di */
151 COSTS_N_INSNS (1) + 1, /* int_shift */
152 COSTS_N_INSNS (1), /* int_cmov */
153 COSTS_N_INSNS (83), /* int_div */
156 COSTS_N_INSNS (4), /* fp_add */
157 COSTS_N_INSNS (4), /* fp_mult */
158 COSTS_N_INSNS (12), /* fp_div_sf */
159 COSTS_N_INSNS (15), /* fp_div_df */
160 COSTS_N_INSNS (7), /* int_mult_si */
161 COSTS_N_INSNS (7), /* int_mult_di */
162 COSTS_N_INSNS (1), /* int_shift */
163 COSTS_N_INSNS (2), /* int_cmov */
164 COSTS_N_INSNS (86), /* int_div */
168 /* Similar but tuned for code size instead of execution latency. The
169 extra +N is fractional cost tuning based on latency. It's used to
170 encourage use of cheaper insns like shift, but only if there's just
173 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
175 COSTS_N_INSNS (1), /* fp_add */
176 COSTS_N_INSNS (1), /* fp_mult */
177 COSTS_N_INSNS (1), /* fp_div_sf */
178 COSTS_N_INSNS (1) + 1, /* fp_div_df */
179 COSTS_N_INSNS (1) + 1, /* int_mult_si */
180 COSTS_N_INSNS (1) + 2, /* int_mult_di */
181 COSTS_N_INSNS (1), /* int_shift */
182 COSTS_N_INSNS (1), /* int_cmov */
183 COSTS_N_INSNS (6), /* int_div */
186 /* Get the number of args of a function in one of two ways. */
187 #if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK
188 #define NUM_ARGS crtl->args.info.num_args
190 #define NUM_ARGS crtl->args.info
196 /* Declarations of static functions. */
197 static struct machine_function *alpha_init_machine_status (void);
198 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
200 #if TARGET_ABI_OPEN_VMS
201 static void alpha_write_linkage (FILE *, const char *, tree);
204 static void unicosmk_output_deferred_case_vectors (FILE *);
205 static void unicosmk_gen_dsib (unsigned long *);
206 static void unicosmk_output_ssib (FILE *, const char *);
207 static int unicosmk_need_dex (rtx);
209 /* Implement TARGET_HANDLE_OPTION. */
212 alpha_handle_option (size_t code, const char *arg, int value)
218 target_flags |= MASK_SOFT_FP;
222 case OPT_mieee_with_inexact:
223 target_flags |= MASK_IEEE_CONFORMANT;
227 if (value != 16 && value != 32 && value != 64)
228 error ("bad value %qs for -mtls-size switch", arg);
235 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
236 /* Implement TARGET_MANGLE_TYPE. */
239 alpha_mangle_type (const_tree type)
241 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
242 && TARGET_LONG_DOUBLE_128)
245 /* For all other types, use normal C++ mangling. */
250 /* Parse target option strings. */
253 override_options (void)
255 static const struct cpu_table {
256 const char *const name;
257 const enum processor_type processor;
260 { "ev4", PROCESSOR_EV4, 0 },
261 { "ev45", PROCESSOR_EV4, 0 },
262 { "21064", PROCESSOR_EV4, 0 },
263 { "ev5", PROCESSOR_EV5, 0 },
264 { "21164", PROCESSOR_EV5, 0 },
265 { "ev56", PROCESSOR_EV5, MASK_BWX },
266 { "21164a", PROCESSOR_EV5, MASK_BWX },
267 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX },
268 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
269 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
270 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
271 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
272 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
273 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }
276 int const ct_size = ARRAY_SIZE (cpu_table);
279 /* Unicos/Mk doesn't have shared libraries. */
280 if (TARGET_ABI_UNICOSMK && flag_pic)
282 warning (0, "-f%s ignored for Unicos/Mk (not supported)",
283 (flag_pic > 1) ? "PIC" : "pic");
287 /* On Unicos/Mk, the native compiler consistently generates /d suffices for
288 floating-point instructions. Make that the default for this target. */
289 if (TARGET_ABI_UNICOSMK)
290 alpha_fprm = ALPHA_FPRM_DYN;
292 alpha_fprm = ALPHA_FPRM_NORM;
294 alpha_tp = ALPHA_TP_PROG;
295 alpha_fptm = ALPHA_FPTM_N;
297 /* We cannot use su and sui qualifiers for conversion instructions on
298 Unicos/Mk. I'm not sure if this is due to assembler or hardware
299 limitations. Right now, we issue a warning if -mieee is specified
300 and then ignore it; eventually, we should either get it right or
301 disable the option altogether. */
305 if (TARGET_ABI_UNICOSMK)
306 warning (0, "-mieee not supported on Unicos/Mk");
309 alpha_tp = ALPHA_TP_INSN;
310 alpha_fptm = ALPHA_FPTM_SU;
314 if (TARGET_IEEE_WITH_INEXACT)
316 if (TARGET_ABI_UNICOSMK)
317 warning (0, "-mieee-with-inexact not supported on Unicos/Mk");
320 alpha_tp = ALPHA_TP_INSN;
321 alpha_fptm = ALPHA_FPTM_SUI;
327 if (! strcmp (alpha_tp_string, "p"))
328 alpha_tp = ALPHA_TP_PROG;
329 else if (! strcmp (alpha_tp_string, "f"))
330 alpha_tp = ALPHA_TP_FUNC;
331 else if (! strcmp (alpha_tp_string, "i"))
332 alpha_tp = ALPHA_TP_INSN;
334 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
337 if (alpha_fprm_string)
339 if (! strcmp (alpha_fprm_string, "n"))
340 alpha_fprm = ALPHA_FPRM_NORM;
341 else if (! strcmp (alpha_fprm_string, "m"))
342 alpha_fprm = ALPHA_FPRM_MINF;
343 else if (! strcmp (alpha_fprm_string, "c"))
344 alpha_fprm = ALPHA_FPRM_CHOP;
345 else if (! strcmp (alpha_fprm_string,"d"))
346 alpha_fprm = ALPHA_FPRM_DYN;
348 error ("bad value %qs for -mfp-rounding-mode switch",
352 if (alpha_fptm_string)
354 if (strcmp (alpha_fptm_string, "n") == 0)
355 alpha_fptm = ALPHA_FPTM_N;
356 else if (strcmp (alpha_fptm_string, "u") == 0)
357 alpha_fptm = ALPHA_FPTM_U;
358 else if (strcmp (alpha_fptm_string, "su") == 0)
359 alpha_fptm = ALPHA_FPTM_SU;
360 else if (strcmp (alpha_fptm_string, "sui") == 0)
361 alpha_fptm = ALPHA_FPTM_SUI;
363 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
366 if (alpha_cpu_string)
368 for (i = 0; i < ct_size; i++)
369 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
371 alpha_tune = alpha_cpu = cpu_table [i].processor;
372 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
373 target_flags |= cpu_table [i].flags;
377 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
380 if (alpha_tune_string)
382 for (i = 0; i < ct_size; i++)
383 if (! strcmp (alpha_tune_string, cpu_table [i].name))
385 alpha_tune = cpu_table [i].processor;
389 error ("bad value %qs for -mcpu switch", alpha_tune_string);
392 /* Do some sanity checks on the above options. */
394 if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N)
396 warning (0, "trap mode not supported on Unicos/Mk");
397 alpha_fptm = ALPHA_FPTM_N;
400 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
401 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
403 warning (0, "fp software completion requires -mtrap-precision=i");
404 alpha_tp = ALPHA_TP_INSN;
407 if (alpha_cpu == PROCESSOR_EV6)
409 /* Except for EV6 pass 1 (not released), we always have precise
410 arithmetic traps. Which means we can do software completion
411 without minding trap shadows. */
412 alpha_tp = ALPHA_TP_PROG;
415 if (TARGET_FLOAT_VAX)
417 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
419 warning (0, "rounding mode not supported for VAX floats");
420 alpha_fprm = ALPHA_FPRM_NORM;
422 if (alpha_fptm == ALPHA_FPTM_SUI)
424 warning (0, "trap mode not supported for VAX floats");
425 alpha_fptm = ALPHA_FPTM_SU;
427 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
428 warning (0, "128-bit long double not supported for VAX floats");
429 target_flags &= ~MASK_LONG_DOUBLE_128;
436 if (!alpha_mlat_string)
437 alpha_mlat_string = "L1";
439 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
440 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
442 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
443 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
444 && alpha_mlat_string[2] == '\0')
446 static int const cache_latency[][4] =
448 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
449 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
450 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
453 lat = alpha_mlat_string[1] - '0';
454 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
456 warning (0, "L%d cache latency unknown for %s",
457 lat, alpha_cpu_name[alpha_tune]);
461 lat = cache_latency[alpha_tune][lat-1];
463 else if (! strcmp (alpha_mlat_string, "main"))
465 /* Most current memories have about 370ns latency. This is
466 a reasonable guess for a fast cpu. */
471 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
475 alpha_memory_latency = lat;
478 /* Default the definition of "small data" to 8 bytes. */
482 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
484 target_flags |= MASK_SMALL_DATA;
485 else if (flag_pic == 2)
486 target_flags &= ~MASK_SMALL_DATA;
488 /* Align labels and loops for optimal branching. */
489 /* ??? Kludge these by not doing anything if we don't optimize and also if
490 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
491 if (optimize > 0 && write_symbols != SDB_DEBUG)
493 if (align_loops <= 0)
495 if (align_jumps <= 0)
498 if (align_functions <= 0)
499 align_functions = 16;
501 /* Acquire a unique set number for our register saves and restores. */
502 alpha_sr_alias_set = new_alias_set ();
504 /* Register variables and functions with the garbage collector. */
506 /* Set up function hooks. */
507 init_machine_status = alpha_init_machine_status;
509 /* Tell the compiler when we're using VAX floating point. */
510 if (TARGET_FLOAT_VAX)
512 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
513 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
514 REAL_MODE_FORMAT (TFmode) = NULL;
517 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
518 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
519 target_flags |= MASK_LONG_DOUBLE_128;
522 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
523 can be optimized to ap = __builtin_next_arg (0). */
524 if (TARGET_ABI_UNICOSMK)
525 targetm.expand_builtin_va_start = NULL;
528 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
531 zap_mask (HOST_WIDE_INT value)
535 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
537 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
543 /* Return true if OP is valid for a particular TLS relocation.
544 We are already guaranteed that OP is a CONST. */
547 tls_symbolic_operand_1 (rtx op, int size, int unspec)
551 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
553 op = XVECEXP (op, 0, 0);
555 if (GET_CODE (op) != SYMBOL_REF)
558 switch (SYMBOL_REF_TLS_MODEL (op))
560 case TLS_MODEL_LOCAL_DYNAMIC:
561 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
562 case TLS_MODEL_INITIAL_EXEC:
563 return unspec == UNSPEC_TPREL && size == 64;
564 case TLS_MODEL_LOCAL_EXEC:
565 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
571 /* Used by aligned_memory_operand and unaligned_memory_operand to
572 resolve what reload is going to do with OP if it's a register. */
575 resolve_reload_operand (rtx op)
577 if (reload_in_progress)
580 if (GET_CODE (tmp) == SUBREG)
581 tmp = SUBREG_REG (tmp);
583 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
585 op = reg_equiv_memory_loc[REGNO (tmp)];
593 /* The scalar modes supported differs from the default check-what-c-supports
594 version in that sometimes TFmode is available even when long double
595 indicates only DFmode. On unicosmk, we have the situation that HImode
596 doesn't map to any C type, but of course we still support that. */
599 alpha_scalar_mode_supported_p (enum machine_mode mode)
607 case TImode: /* via optabs.c */
615 return TARGET_HAS_XFLOATING_LIBS;
622 /* Alpha implements a couple of integer vector mode operations when
623 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
624 which allows the vectorizer to operate on e.g. move instructions,
625 or when expand_vector_operations can do something useful. */
628 alpha_vector_mode_supported_p (enum machine_mode mode)
630 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
633 /* Return 1 if this function can directly return via $26. */
638 return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK
640 && alpha_sa_size () == 0
641 && get_frame_size () == 0
642 && crtl->outgoing_args_size == 0
643 && crtl->args.pretend_args_size == 0);
646 /* Return the ADDR_VEC associated with a tablejump insn. */
649 alpha_tablejump_addr_vec (rtx insn)
653 tmp = JUMP_LABEL (insn);
656 tmp = NEXT_INSN (tmp);
660 && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC)
661 return PATTERN (tmp);
665 /* Return the label of the predicted edge, or CONST0_RTX if we don't know. */
668 alpha_tablejump_best_label (rtx insn)
670 rtx jump_table = alpha_tablejump_addr_vec (insn);
671 rtx best_label = NULL_RTX;
673 /* ??? Once the CFG doesn't keep getting completely rebuilt, look
674 there for edge frequency counts from profile data. */
678 int n_labels = XVECLEN (jump_table, 1);
682 for (i = 0; i < n_labels; i++)
686 for (j = i + 1; j < n_labels; j++)
687 if (XEXP (XVECEXP (jump_table, 1, i), 0)
688 == XEXP (XVECEXP (jump_table, 1, j), 0))
691 if (count > best_count)
692 best_count = count, best_label = XVECEXP (jump_table, 1, i);
696 return best_label ? best_label : const0_rtx;
699 /* Return the TLS model to use for SYMBOL. */
701 static enum tls_model
702 tls_symbolic_operand_type (rtx symbol)
704 enum tls_model model;
706 if (GET_CODE (symbol) != SYMBOL_REF)
707 return TLS_MODEL_NONE;
708 model = SYMBOL_REF_TLS_MODEL (symbol);
710 /* Local-exec with a 64-bit size is the same code as initial-exec. */
711 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
712 model = TLS_MODEL_INITIAL_EXEC;
717 /* Return true if the function DECL will share the same GP as any
718 function in the current unit of translation. */
721 decl_has_samegp (const_tree decl)
723 /* Functions that are not local can be overridden, and thus may
724 not share the same gp. */
725 if (!(*targetm.binds_local_p) (decl))
728 /* If -msmall-data is in effect, assume that there is only one GP
729 for the module, and so any local symbol has this property. We
730 need explicit relocations to be able to enforce this for symbols
731 not defined in this unit of translation, however. */
732 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
735 /* Functions that are not external are defined in this UoT. */
736 /* ??? Irritatingly, static functions not yet emitted are still
737 marked "external". Apply this to non-static functions only. */
738 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
741 /* Return true if EXP should be placed in the small data section. */
744 alpha_in_small_data_p (const_tree exp)
746 /* We want to merge strings, so we never consider them small data. */
747 if (TREE_CODE (exp) == STRING_CST)
750 /* Functions are never in the small data area. Duh. */
751 if (TREE_CODE (exp) == FUNCTION_DECL)
754 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
756 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
757 if (strcmp (section, ".sdata") == 0
758 || strcmp (section, ".sbss") == 0)
763 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
765 /* If this is an incomplete type with size 0, then we can't put it
766 in sdata because it might be too big when completed. */
767 if (size > 0 && (unsigned HOST_WIDE_INT) size <= g_switch_value)
774 #if TARGET_ABI_OPEN_VMS
776 alpha_linkage_symbol_p (const char *symname)
778 int symlen = strlen (symname);
781 return strcmp (&symname [symlen - 4], "..lk") == 0;
786 #define LINKAGE_SYMBOL_REF_P(X) \
787 ((GET_CODE (X) == SYMBOL_REF \
788 && alpha_linkage_symbol_p (XSTR (X, 0))) \
789 || (GET_CODE (X) == CONST \
790 && GET_CODE (XEXP (X, 0)) == PLUS \
791 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
792 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
795 /* legitimate_address_p recognizes an RTL expression that is a valid
796 memory address for an instruction. The MODE argument is the
797 machine mode for the MEM expression that wants to use this address.
799 For Alpha, we have either a constant address or the sum of a
800 register and a constant address, or just a register. For DImode,
801 any of those forms can be surrounded with an AND that clear the
802 low-order three bits; this is an "unaligned" access. */
805 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
807 /* If this is an ldq_u type address, discard the outer AND. */
809 && GET_CODE (x) == AND
810 && CONST_INT_P (XEXP (x, 1))
811 && INTVAL (XEXP (x, 1)) == -8)
814 /* Discard non-paradoxical subregs. */
815 if (GET_CODE (x) == SUBREG
816 && (GET_MODE_SIZE (GET_MODE (x))
817 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
820 /* Unadorned general registers are valid. */
823 ? STRICT_REG_OK_FOR_BASE_P (x)
824 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
827 /* Constant addresses (i.e. +/- 32k) are valid. */
828 if (CONSTANT_ADDRESS_P (x))
831 #if TARGET_ABI_OPEN_VMS
832 if (LINKAGE_SYMBOL_REF_P (x))
836 /* Register plus a small constant offset is valid. */
837 if (GET_CODE (x) == PLUS)
839 rtx ofs = XEXP (x, 1);
842 /* Discard non-paradoxical subregs. */
843 if (GET_CODE (x) == SUBREG
844 && (GET_MODE_SIZE (GET_MODE (x))
845 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
851 && NONSTRICT_REG_OK_FP_BASE_P (x)
852 && CONST_INT_P (ofs))
855 ? STRICT_REG_OK_FOR_BASE_P (x)
856 : NONSTRICT_REG_OK_FOR_BASE_P (x))
857 && CONSTANT_ADDRESS_P (ofs))
862 /* If we're managing explicit relocations, LO_SUM is valid, as are small
863 data symbols. Avoid explicit relocations of modes larger than word
864 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
865 else if (TARGET_EXPLICIT_RELOCS
866 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
868 if (small_symbolic_operand (x, Pmode))
871 if (GET_CODE (x) == LO_SUM)
873 rtx ofs = XEXP (x, 1);
876 /* Discard non-paradoxical subregs. */
877 if (GET_CODE (x) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (x))
879 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
882 /* Must have a valid base register. */
885 ? STRICT_REG_OK_FOR_BASE_P (x)
886 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
889 /* The symbol must be local. */
890 if (local_symbolic_operand (ofs, Pmode)
891 || dtp32_symbolic_operand (ofs, Pmode)
892 || tp32_symbolic_operand (ofs, Pmode))
900 /* Build the SYMBOL_REF for __tls_get_addr. */
902 static GTY(()) rtx tls_get_addr_libfunc;
905 get_tls_get_addr (void)
907 if (!tls_get_addr_libfunc)
908 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
909 return tls_get_addr_libfunc;
912 /* Try machine-dependent ways of modifying an illegitimate address
913 to be legitimate. If we find one, return the new, valid address. */
916 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
918 HOST_WIDE_INT addend;
920 /* If the address is (plus reg const_int) and the CONST_INT is not a
921 valid offset, compute the high part of the constant and add it to
922 the register. Then our address is (plus temp low-part-const). */
923 if (GET_CODE (x) == PLUS
924 && REG_P (XEXP (x, 0))
925 && CONST_INT_P (XEXP (x, 1))
926 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
928 addend = INTVAL (XEXP (x, 1));
933 /* If the address is (const (plus FOO const_int)), find the low-order
934 part of the CONST_INT. Then load FOO plus any high-order part of the
935 CONST_INT into a register. Our address is (plus reg low-part-const).
936 This is done to reduce the number of GOT entries. */
937 if (can_create_pseudo_p ()
938 && GET_CODE (x) == CONST
939 && GET_CODE (XEXP (x, 0)) == PLUS
940 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
942 addend = INTVAL (XEXP (XEXP (x, 0), 1));
943 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
947 /* If we have a (plus reg const), emit the load as in (2), then add
948 the two registers, and finally generate (plus reg low-part-const) as
950 if (can_create_pseudo_p ()
951 && GET_CODE (x) == PLUS
952 && REG_P (XEXP (x, 0))
953 && GET_CODE (XEXP (x, 1)) == CONST
954 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
955 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
957 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
958 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
959 XEXP (XEXP (XEXP (x, 1), 0), 0),
960 NULL_RTX, 1, OPTAB_LIB_WIDEN);
964 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
965 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
966 around +/- 32k offset. */
967 if (TARGET_EXPLICIT_RELOCS
968 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
969 && symbolic_operand (x, Pmode))
971 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
973 switch (tls_symbolic_operand_type (x))
978 case TLS_MODEL_GLOBAL_DYNAMIC:
981 r0 = gen_rtx_REG (Pmode, 0);
982 r16 = gen_rtx_REG (Pmode, 16);
983 tga = get_tls_get_addr ();
984 dest = gen_reg_rtx (Pmode);
985 seq = GEN_INT (alpha_next_sequence_number++);
987 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
988 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
989 insn = emit_call_insn (insn);
990 RTL_CONST_CALL_P (insn) = 1;
991 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
996 emit_libcall_block (insn, dest, r0, x);
999 case TLS_MODEL_LOCAL_DYNAMIC:
1002 r0 = gen_rtx_REG (Pmode, 0);
1003 r16 = gen_rtx_REG (Pmode, 16);
1004 tga = get_tls_get_addr ();
1005 scratch = gen_reg_rtx (Pmode);
1006 seq = GEN_INT (alpha_next_sequence_number++);
1008 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1009 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1010 insn = emit_call_insn (insn);
1011 RTL_CONST_CALL_P (insn) = 1;
1012 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1014 insn = get_insns ();
1017 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1018 UNSPEC_TLSLDM_CALL);
1019 emit_libcall_block (insn, scratch, r0, eqv);
1021 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1022 eqv = gen_rtx_CONST (Pmode, eqv);
1024 if (alpha_tls_size == 64)
1026 dest = gen_reg_rtx (Pmode);
1027 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1028 emit_insn (gen_adddi3 (dest, dest, scratch));
1031 if (alpha_tls_size == 32)
1033 insn = gen_rtx_HIGH (Pmode, eqv);
1034 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1035 scratch = gen_reg_rtx (Pmode);
1036 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1038 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1040 case TLS_MODEL_INITIAL_EXEC:
1041 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1042 eqv = gen_rtx_CONST (Pmode, eqv);
1043 tp = gen_reg_rtx (Pmode);
1044 scratch = gen_reg_rtx (Pmode);
1045 dest = gen_reg_rtx (Pmode);
1047 emit_insn (gen_load_tp (tp));
1048 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1049 emit_insn (gen_adddi3 (dest, tp, scratch));
1052 case TLS_MODEL_LOCAL_EXEC:
1053 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1054 eqv = gen_rtx_CONST (Pmode, eqv);
1055 tp = gen_reg_rtx (Pmode);
1057 emit_insn (gen_load_tp (tp));
1058 if (alpha_tls_size == 32)
1060 insn = gen_rtx_HIGH (Pmode, eqv);
1061 insn = gen_rtx_PLUS (Pmode, tp, insn);
1062 tp = gen_reg_rtx (Pmode);
1063 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1065 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1071 if (local_symbolic_operand (x, Pmode))
1073 if (small_symbolic_operand (x, Pmode))
1077 if (can_create_pseudo_p ())
1078 scratch = gen_reg_rtx (Pmode);
1079 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1080 gen_rtx_HIGH (Pmode, x)));
1081 return gen_rtx_LO_SUM (Pmode, scratch, x);
1090 HOST_WIDE_INT low, high;
1092 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1094 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1098 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1099 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1100 1, OPTAB_LIB_WIDEN);
1102 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1103 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1104 1, OPTAB_LIB_WIDEN);
1106 return plus_constant (x, low);
1111 /* Try machine-dependent ways of modifying an illegitimate address
1112 to be legitimate. Return X or the new, valid address. */
1115 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1116 enum machine_mode mode)
1118 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1119 return new_x ? new_x : x;
1122 /* Primarily this is required for TLS symbols, but given that our move
1123 patterns *ought* to be able to handle any symbol at any time, we
1124 should never be spilling symbolic operands to the constant pool, ever. */
1127 alpha_cannot_force_const_mem (rtx x)
1129 enum rtx_code code = GET_CODE (x);
1130 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1133 /* We do not allow indirect calls to be optimized into sibling calls, nor
1134 can we allow a call to a function with a different GP to be optimized
1138 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1140 /* Can't do indirect tail calls, since we don't know if the target
1141 uses the same GP. */
1145 /* Otherwise, we can make a tail call if the target function shares
1147 return decl_has_samegp (decl);
1151 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1155 /* Don't re-split. */
1156 if (GET_CODE (x) == LO_SUM)
1159 return small_symbolic_operand (x, Pmode) != 0;
1163 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1167 /* Don't re-split. */
1168 if (GET_CODE (x) == LO_SUM)
1171 if (small_symbolic_operand (x, Pmode))
1173 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1182 split_small_symbolic_operand (rtx x)
1185 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1189 /* Indicate that INSN cannot be duplicated. This is true for any insn
1190 that we've marked with gpdisp relocs, since those have to stay in
1191 1-1 correspondence with one another.
1193 Technically we could copy them if we could set up a mapping from one
1194 sequence number to another, across the set of insns to be duplicated.
1195 This seems overly complicated and error-prone since interblock motion
1196 from sched-ebb could move one of the pair of insns to a different block.
1198 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1199 then they'll be in a different block from their ldgp. Which could lead
1200 the bb reorder code to think that it would be ok to copy just the block
1201 containing the call and branch to the block containing the ldgp. */
1204 alpha_cannot_copy_insn_p (rtx insn)
1206 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1208 if (recog_memoized (insn) >= 0)
1209 return get_attr_cannot_copy (insn);
1215 /* Try a machine-dependent way of reloading an illegitimate address
1216 operand. If we find one, push the reload and return the new rtx. */
1219 alpha_legitimize_reload_address (rtx x,
1220 enum machine_mode mode ATTRIBUTE_UNUSED,
1221 int opnum, int type,
1222 int ind_levels ATTRIBUTE_UNUSED)
1224 /* We must recognize output that we have already generated ourselves. */
1225 if (GET_CODE (x) == PLUS
1226 && GET_CODE (XEXP (x, 0)) == PLUS
1227 && REG_P (XEXP (XEXP (x, 0), 0))
1228 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1229 && CONST_INT_P (XEXP (x, 1)))
1231 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1232 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1233 opnum, (enum reload_type) type);
1237 /* We wish to handle large displacements off a base register by
1238 splitting the addend across an ldah and the mem insn. This
1239 cuts number of extra insns needed from 3 to 1. */
1240 if (GET_CODE (x) == PLUS
1241 && REG_P (XEXP (x, 0))
1242 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1243 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1244 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1246 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1247 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1249 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1251 /* Check for 32-bit overflow. */
1252 if (high + low != val)
1255 /* Reload the high part into a base reg; leave the low part
1256 in the mem directly. */
1257 x = gen_rtx_PLUS (GET_MODE (x),
1258 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1262 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1263 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1264 opnum, (enum reload_type) type);
1271 /* Compute a (partial) cost for rtx X. Return true if the complete
1272 cost has been computed, and false if subexpressions should be
1273 scanned. In either case, *TOTAL contains the cost result. */
1276 alpha_rtx_costs (rtx x, int code, int outer_code, int *total,
1279 enum machine_mode mode = GET_MODE (x);
1280 bool float_mode_p = FLOAT_MODE_P (mode);
1281 const struct alpha_rtx_cost_data *cost_data;
1284 cost_data = &alpha_rtx_cost_size;
1286 cost_data = &alpha_rtx_cost_data[alpha_tune];
1291 /* If this is an 8-bit constant, return zero since it can be used
1292 nearly anywhere with no cost. If it is a valid operand for an
1293 ADD or AND, likewise return 0 if we know it will be used in that
1294 context. Otherwise, return 2 since it might be used there later.
1295 All other constants take at least two insns. */
1296 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1304 if (x == CONST0_RTX (mode))
1306 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1307 || (outer_code == AND && and_operand (x, VOIDmode)))
1309 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1312 *total = COSTS_N_INSNS (2);
1318 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1319 *total = COSTS_N_INSNS (outer_code != MEM);
1320 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1321 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1322 else if (tls_symbolic_operand_type (x))
1323 /* Estimate of cost for call_pal rduniq. */
1324 /* ??? How many insns do we emit here? More than one... */
1325 *total = COSTS_N_INSNS (15);
1327 /* Otherwise we do a load from the GOT. */
1328 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1332 /* This is effectively an add_operand. */
1339 *total = cost_data->fp_add;
1340 else if (GET_CODE (XEXP (x, 0)) == MULT
1341 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1343 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1344 (enum rtx_code) outer_code, speed)
1345 + rtx_cost (XEXP (x, 1),
1346 (enum rtx_code) outer_code, speed)
1347 + COSTS_N_INSNS (1));
1354 *total = cost_data->fp_mult;
1355 else if (mode == DImode)
1356 *total = cost_data->int_mult_di;
1358 *total = cost_data->int_mult_si;
1362 if (CONST_INT_P (XEXP (x, 1))
1363 && INTVAL (XEXP (x, 1)) <= 3)
1365 *total = COSTS_N_INSNS (1);
1372 *total = cost_data->int_shift;
1377 *total = cost_data->fp_add;
1379 *total = cost_data->int_cmov;
1387 *total = cost_data->int_div;
1388 else if (mode == SFmode)
1389 *total = cost_data->fp_div_sf;
1391 *total = cost_data->fp_div_df;
1395 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1401 *total = COSTS_N_INSNS (1);
1409 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1415 case UNSIGNED_FLOAT:
1418 case FLOAT_TRUNCATE:
1419 *total = cost_data->fp_add;
1423 if (MEM_P (XEXP (x, 0)))
1426 *total = cost_data->fp_add;
1434 /* REF is an alignable memory location. Place an aligned SImode
1435 reference into *PALIGNED_MEM and the number of bits to shift into
1436 *PBITNUM. SCRATCH is a free register for use in reloading out
1437 of range stack slots. */
1440 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1443 HOST_WIDE_INT disp, offset;
1445 gcc_assert (MEM_P (ref));
1447 if (reload_in_progress
1448 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1450 base = find_replacement (&XEXP (ref, 0));
1451 gcc_assert (memory_address_p (GET_MODE (ref), base));
1454 base = XEXP (ref, 0);
1456 if (GET_CODE (base) == PLUS)
1457 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1461 /* Find the byte offset within an aligned word. If the memory itself is
1462 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1463 will have examined the base register and determined it is aligned, and
1464 thus displacements from it are naturally alignable. */
1465 if (MEM_ALIGN (ref) >= 32)
1470 /* Access the entire aligned word. */
1471 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1473 /* Convert the byte offset within the word to a bit offset. */
1474 if (WORDS_BIG_ENDIAN)
1475 offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8);
1478 *pbitnum = GEN_INT (offset);
1481 /* Similar, but just get the address. Handle the two reload cases.
1482 Add EXTRA_OFFSET to the address we return. */
1485 get_unaligned_address (rtx ref)
1488 HOST_WIDE_INT offset = 0;
1490 gcc_assert (MEM_P (ref));
1492 if (reload_in_progress
1493 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1495 base = find_replacement (&XEXP (ref, 0));
1497 gcc_assert (memory_address_p (GET_MODE (ref), base));
1500 base = XEXP (ref, 0);
1502 if (GET_CODE (base) == PLUS)
1503 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1505 return plus_constant (base, offset);
1508 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1509 X is always returned in a register. */
1512 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1514 if (GET_CODE (addr) == PLUS)
1516 ofs += INTVAL (XEXP (addr, 1));
1517 addr = XEXP (addr, 0);
1520 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1521 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1524 /* On the Alpha, all (non-symbolic) constants except zero go into
1525 a floating-point register via memory. Note that we cannot
1526 return anything that is not a subset of RCLASS, and that some
1527 symbolic constants cannot be dropped to memory. */
1530 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1532 /* Zero is present in any register class. */
1533 if (x == CONST0_RTX (GET_MODE (x)))
1536 /* These sorts of constants we can easily drop to memory. */
1538 || GET_CODE (x) == CONST_DOUBLE
1539 || GET_CODE (x) == CONST_VECTOR)
1541 if (rclass == FLOAT_REGS)
1543 if (rclass == ALL_REGS)
1544 return GENERAL_REGS;
1548 /* All other kinds of constants should not (and in the case of HIGH
1549 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1550 secondary reload. */
1552 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1557 /* Inform reload about cases where moving X with a mode MODE to a register in
1558 RCLASS requires an extra scratch or immediate register. Return the class
1559 needed for the immediate register. */
1561 static enum reg_class
1562 alpha_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
1563 enum machine_mode mode, secondary_reload_info *sri)
1565 /* Loading and storing HImode or QImode values to and from memory
1566 usually requires a scratch register. */
1567 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1569 if (any_memory_operand (x, mode))
1573 if (!aligned_memory_operand (x, mode))
1574 sri->icode = reload_in_optab[mode];
1577 sri->icode = reload_out_optab[mode];
1582 /* We also cannot do integral arithmetic into FP regs, as might result
1583 from register elimination into a DImode fp register. */
1584 if (rclass == FLOAT_REGS)
1586 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1587 return GENERAL_REGS;
1588 if (in_p && INTEGRAL_MODE_P (mode)
1589 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1590 return GENERAL_REGS;
1596 /* Subfunction of the following function. Update the flags of any MEM
1597 found in part of X. */
1600 alpha_set_memflags_1 (rtx *xp, void *data)
1602 rtx x = *xp, orig = (rtx) data;
1607 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1608 MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
1609 MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
1610 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1611 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1613 /* Sadly, we cannot use alias sets because the extra aliasing
1614 produced by the AND interferes. Given that two-byte quantities
1615 are the only thing we would be able to differentiate anyway,
1616 there does not seem to be any point in convoluting the early
1617 out of the alias check. */
1622 /* Given SEQ, which is an INSN list, look for any MEMs in either
1623 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1624 volatile flags from REF into each of the MEMs found. If REF is not
1625 a MEM, don't do anything. */
1628 alpha_set_memflags (rtx seq, rtx ref)
1635 /* This is only called from alpha.md, after having had something
1636 generated from one of the insn patterns. So if everything is
1637 zero, the pattern is already up-to-date. */
1638 if (!MEM_VOLATILE_P (ref)
1639 && !MEM_IN_STRUCT_P (ref)
1640 && !MEM_SCALAR_P (ref)
1641 && !MEM_NOTRAP_P (ref)
1642 && !MEM_READONLY_P (ref))
1645 for (insn = seq; insn; insn = NEXT_INSN (insn))
1647 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1652 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1655 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1656 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1657 and return pc_rtx if successful. */
1660 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1661 HOST_WIDE_INT c, int n, bool no_output)
1663 HOST_WIDE_INT new_const;
1665 /* Use a pseudo if highly optimizing and still generating RTL. */
1667 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1670 /* If this is a sign-extended 32-bit constant, we can do this in at most
1671 three insns, so do it if we have enough insns left. We always have
1672 a sign-extended 32-bit constant when compiling on a narrow machine. */
1674 if (HOST_BITS_PER_WIDE_INT != 64
1675 || c >> 31 == -1 || c >> 31 == 0)
1677 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1678 HOST_WIDE_INT tmp1 = c - low;
1679 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1680 HOST_WIDE_INT extra = 0;
1682 /* If HIGH will be interpreted as negative but the constant is
1683 positive, we must adjust it to do two ldha insns. */
1685 if ((high & 0x8000) != 0 && c >= 0)
1689 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1692 if (c == low || (low == 0 && extra == 0))
1694 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1695 but that meant that we can't handle INT_MIN on 32-bit machines
1696 (like NT/Alpha), because we recurse indefinitely through
1697 emit_move_insn to gen_movdi. So instead, since we know exactly
1698 what we want, create it explicitly. */
1703 target = gen_reg_rtx (mode);
1704 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1707 else if (n >= 2 + (extra != 0))
1711 if (!can_create_pseudo_p ())
1713 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1717 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1720 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1721 This means that if we go through expand_binop, we'll try to
1722 generate extensions, etc, which will require new pseudos, which
1723 will fail during some split phases. The SImode add patterns
1724 still exist, but are not named. So build the insns by hand. */
1729 subtarget = gen_reg_rtx (mode);
1730 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1731 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1737 target = gen_reg_rtx (mode);
1738 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1739 insn = gen_rtx_SET (VOIDmode, target, insn);
1745 /* If we couldn't do it that way, try some other methods. But if we have
1746 no instructions left, don't bother. Likewise, if this is SImode and
1747 we can't make pseudos, we can't do anything since the expand_binop
1748 and expand_unop calls will widen and try to make pseudos. */
1750 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1753 /* Next, see if we can load a related constant and then shift and possibly
1754 negate it to get the constant we want. Try this once each increasing
1755 numbers of insns. */
1757 for (i = 1; i < n; i++)
1759 /* First, see if minus some low bits, we've an easy load of
1762 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1765 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1770 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1771 target, 0, OPTAB_WIDEN);
1775 /* Next try complementing. */
1776 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1781 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1784 /* Next try to form a constant and do a left shift. We can do this
1785 if some low-order bits are zero; the exact_log2 call below tells
1786 us that information. The bits we are shifting out could be any
1787 value, but here we'll just try the 0- and sign-extended forms of
1788 the constant. To try to increase the chance of having the same
1789 constant in more than one insn, start at the highest number of
1790 bits to shift, but try all possibilities in case a ZAPNOT will
1793 bits = exact_log2 (c & -c);
1795 for (; bits > 0; bits--)
1797 new_const = c >> bits;
1798 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1801 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1802 temp = alpha_emit_set_const (subtarget, mode, new_const,
1809 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1810 target, 0, OPTAB_WIDEN);
1814 /* Now try high-order zero bits. Here we try the shifted-in bits as
1815 all zero and all ones. Be careful to avoid shifting outside the
1816 mode and to avoid shifting outside the host wide int size. */
1817 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1818 confuse the recursive call and set all of the high 32 bits. */
1820 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1821 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1823 for (; bits > 0; bits--)
1825 new_const = c << bits;
1826 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1829 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1830 temp = alpha_emit_set_const (subtarget, mode, new_const,
1837 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1838 target, 1, OPTAB_WIDEN);
1842 /* Now try high-order 1 bits. We get that with a sign-extension.
1843 But one bit isn't enough here. Be careful to avoid shifting outside
1844 the mode and to avoid shifting outside the host wide int size. */
1846 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1847 - floor_log2 (~ c) - 2);
1849 for (; bits > 0; bits--)
1851 new_const = c << bits;
1852 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1855 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1856 temp = alpha_emit_set_const (subtarget, mode, new_const,
1863 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1864 target, 0, OPTAB_WIDEN);
1869 #if HOST_BITS_PER_WIDE_INT == 64
1870 /* Finally, see if can load a value into the target that is the same as the
1871 constant except that all bytes that are 0 are changed to be 0xff. If we
1872 can, then we can do a ZAPNOT to obtain the desired constant. */
1875 for (i = 0; i < 64; i += 8)
1876 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1877 new_const |= (HOST_WIDE_INT) 0xff << i;
1879 /* We are only called for SImode and DImode. If this is SImode, ensure that
1880 we are sign extended to a full word. */
1883 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1887 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1892 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1893 target, 0, OPTAB_WIDEN);
1901 /* Try to output insns to set TARGET equal to the constant C if it can be
1902 done in less than N insns. Do all computations in MODE. Returns the place
1903 where the output has been placed if it can be done and the insns have been
1904 emitted. If it would take more than N insns, zero is returned and no
1905 insns and emitted. */
1908 alpha_emit_set_const (rtx target, enum machine_mode mode,
1909 HOST_WIDE_INT c, int n, bool no_output)
1911 enum machine_mode orig_mode = mode;
1912 rtx orig_target = target;
1916 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1917 can't load this constant in one insn, do this in DImode. */
1918 if (!can_create_pseudo_p () && mode == SImode
1919 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1921 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1925 target = no_output ? NULL : gen_lowpart (DImode, target);
1928 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1930 target = no_output ? NULL : gen_lowpart (DImode, target);
1934 /* Try 1 insn, then 2, then up to N. */
1935 for (i = 1; i <= n; i++)
1937 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1945 insn = get_last_insn ();
1946 set = single_set (insn);
1947 if (! CONSTANT_P (SET_SRC (set)))
1948 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1953 /* Allow for the case where we changed the mode of TARGET. */
1956 if (result == target)
1957 result = orig_target;
1958 else if (mode != orig_mode)
1959 result = gen_lowpart (orig_mode, result);
1965 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1966 fall back to a straight forward decomposition. We do this to avoid
1967 exponential run times encountered when looking for longer sequences
1968 with alpha_emit_set_const. */
1971 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1973 HOST_WIDE_INT d1, d2, d3, d4;
1975 /* Decompose the entire word */
1976 #if HOST_BITS_PER_WIDE_INT >= 64
1977 gcc_assert (c2 == -(c1 < 0));
1978 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1980 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1981 c1 = (c1 - d2) >> 32;
1982 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1984 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1985 gcc_assert (c1 == d4);
1987 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1989 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1990 gcc_assert (c1 == d2);
1992 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1994 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1995 gcc_assert (c2 == d4);
1998 /* Construct the high word */
2001 emit_move_insn (target, GEN_INT (d4));
2003 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2006 emit_move_insn (target, GEN_INT (d3));
2008 /* Shift it into place */
2009 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2011 /* Add in the low bits. */
2013 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2015 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2020 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2024 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2026 HOST_WIDE_INT i0, i1;
2028 if (GET_CODE (x) == CONST_VECTOR)
2029 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2032 if (CONST_INT_P (x))
2037 else if (HOST_BITS_PER_WIDE_INT >= 64)
2039 i0 = CONST_DOUBLE_LOW (x);
2044 i0 = CONST_DOUBLE_LOW (x);
2045 i1 = CONST_DOUBLE_HIGH (x);
2052 /* Implement LEGITIMATE_CONSTANT_P. This is all constants for which we
2053 are willing to load the value into a register via a move pattern.
2054 Normally this is all symbolic constants, integral constants that
2055 take three or fewer instructions, and floating-point zero. */
2058 alpha_legitimate_constant_p (rtx x)
2060 enum machine_mode mode = GET_MODE (x);
2061 HOST_WIDE_INT i0, i1;
2063 switch (GET_CODE (x))
2070 if (GET_CODE (XEXP (x, 0)) == PLUS
2071 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2072 x = XEXP (XEXP (x, 0), 0);
2076 if (GET_CODE (x) != SYMBOL_REF)
2082 /* TLS symbols are never valid. */
2083 return SYMBOL_REF_TLS_MODEL (x) == 0;
2086 if (x == CONST0_RTX (mode))
2088 if (FLOAT_MODE_P (mode))
2093 if (x == CONST0_RTX (mode))
2095 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2097 if (GET_MODE_SIZE (mode) != 8)
2103 if (TARGET_BUILD_CONSTANTS)
2105 alpha_extract_integer (x, &i0, &i1);
2106 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2107 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2115 /* Operand 1 is known to be a constant, and should require more than one
2116 instruction to load. Emit that multi-part load. */
2119 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2121 HOST_WIDE_INT i0, i1;
2122 rtx temp = NULL_RTX;
2124 alpha_extract_integer (operands[1], &i0, &i1);
2126 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2127 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2129 if (!temp && TARGET_BUILD_CONSTANTS)
2130 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2134 if (!rtx_equal_p (operands[0], temp))
2135 emit_move_insn (operands[0], temp);
2142 /* Expand a move instruction; return true if all work is done.
2143 We don't handle non-bwx subword loads here. */
2146 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2150 /* If the output is not a register, the input must be. */
2151 if (MEM_P (operands[0])
2152 && ! reg_or_0_operand (operands[1], mode))
2153 operands[1] = force_reg (mode, operands[1]);
2155 /* Allow legitimize_address to perform some simplifications. */
2156 if (mode == Pmode && symbolic_operand (operands[1], mode))
2158 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2161 if (tmp == operands[0])
2168 /* Early out for non-constants and valid constants. */
2169 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2172 /* Split large integers. */
2173 if (CONST_INT_P (operands[1])
2174 || GET_CODE (operands[1]) == CONST_DOUBLE
2175 || GET_CODE (operands[1]) == CONST_VECTOR)
2177 if (alpha_split_const_mov (mode, operands))
2181 /* Otherwise we've nothing left but to drop the thing to memory. */
2182 tmp = force_const_mem (mode, operands[1]);
2184 if (tmp == NULL_RTX)
2187 if (reload_in_progress)
2189 emit_move_insn (operands[0], XEXP (tmp, 0));
2190 operands[1] = replace_equiv_address (tmp, operands[0]);
2193 operands[1] = validize_mem (tmp);
2197 /* Expand a non-bwx QImode or HImode move instruction;
2198 return true if all work is done. */
2201 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2205 /* If the output is not a register, the input must be. */
2206 if (MEM_P (operands[0]))
2207 operands[1] = force_reg (mode, operands[1]);
2209 /* Handle four memory cases, unaligned and aligned for either the input
2210 or the output. The only case where we can be called during reload is
2211 for aligned loads; all other cases require temporaries. */
2213 if (any_memory_operand (operands[1], mode))
2215 if (aligned_memory_operand (operands[1], mode))
2217 if (reload_in_progress)
2220 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2222 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2227 rtx aligned_mem, bitnum;
2228 rtx scratch = gen_reg_rtx (SImode);
2232 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2234 subtarget = operands[0];
2235 if (REG_P (subtarget))
2236 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2238 subtarget = gen_reg_rtx (DImode), copyout = true;
2241 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2244 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2249 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2254 /* Don't pass these as parameters since that makes the generated
2255 code depend on parameter evaluation order which will cause
2256 bootstrap failures. */
2258 rtx temp1, temp2, subtarget, ua;
2261 temp1 = gen_reg_rtx (DImode);
2262 temp2 = gen_reg_rtx (DImode);
2264 subtarget = operands[0];
2265 if (REG_P (subtarget))
2266 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2268 subtarget = gen_reg_rtx (DImode), copyout = true;
2270 ua = get_unaligned_address (operands[1]);
2272 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2274 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2276 alpha_set_memflags (seq, operands[1]);
2280 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2285 if (any_memory_operand (operands[0], mode))
2287 if (aligned_memory_operand (operands[0], mode))
2289 rtx aligned_mem, bitnum;
2290 rtx temp1 = gen_reg_rtx (SImode);
2291 rtx temp2 = gen_reg_rtx (SImode);
2293 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2295 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2300 rtx temp1 = gen_reg_rtx (DImode);
2301 rtx temp2 = gen_reg_rtx (DImode);
2302 rtx temp3 = gen_reg_rtx (DImode);
2303 rtx ua = get_unaligned_address (operands[0]);
2306 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2308 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2310 alpha_set_memflags (seq, operands[0]);
2319 /* Implement the movmisalign patterns. One of the operands is a memory
2320 that is not naturally aligned. Emit instructions to load it. */
2323 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2325 /* Honor misaligned loads, for those we promised to do so. */
2326 if (MEM_P (operands[1]))
2330 if (register_operand (operands[0], mode))
2333 tmp = gen_reg_rtx (mode);
2335 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2336 if (tmp != operands[0])
2337 emit_move_insn (operands[0], tmp);
2339 else if (MEM_P (operands[0]))
2341 if (!reg_or_0_operand (operands[1], mode))
2342 operands[1] = force_reg (mode, operands[1]);
2343 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2349 /* Generate an unsigned DImode to FP conversion. This is the same code
2350 optabs would emit if we didn't have TFmode patterns.
2352 For SFmode, this is the only construction I've found that can pass
2353 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2354 intermediates will work, because you'll get intermediate rounding
2355 that ruins the end result. Some of this could be fixed by turning
2356 on round-to-positive-infinity, but that requires diddling the fpsr,
2357 which kills performance. I tried turning this around and converting
2358 to a negative number, so that I could turn on /m, but either I did
2359 it wrong or there's something else cause I wound up with the exact
2360 same single-bit error. There is a branch-less form of this same code:
2371 fcmoveq $f10,$f11,$f0
2373 I'm not using it because it's the same number of instructions as
2374 this branch-full form, and it has more serialized long latency
2375 instructions on the critical path.
2377 For DFmode, we can avoid rounding errors by breaking up the word
2378 into two pieces, converting them separately, and adding them back:
2380 LC0: .long 0,0x5f800000
2385 cpyse $f11,$f31,$f10
2386 cpyse $f31,$f11,$f11
2394 This doesn't seem to be a clear-cut win over the optabs form.
2395 It probably all depends on the distribution of numbers being
2396 converted -- in the optabs form, all but high-bit-set has a
2397 much lower minimum execution time. */
2400 alpha_emit_floatuns (rtx operands[2])
2402 rtx neglab, donelab, i0, i1, f0, in, out;
2403 enum machine_mode mode;
2406 in = force_reg (DImode, operands[1]);
2407 mode = GET_MODE (out);
2408 neglab = gen_label_rtx ();
2409 donelab = gen_label_rtx ();
2410 i0 = gen_reg_rtx (DImode);
2411 i1 = gen_reg_rtx (DImode);
2412 f0 = gen_reg_rtx (mode);
2414 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2416 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2417 emit_jump_insn (gen_jump (donelab));
2420 emit_label (neglab);
2422 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2423 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2424 emit_insn (gen_iordi3 (i0, i0, i1));
2425 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2426 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2428 emit_label (donelab);
2431 /* Generate the comparison for a conditional branch. */
2434 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2436 enum rtx_code cmp_code, branch_code;
2437 enum machine_mode branch_mode = VOIDmode;
2438 enum rtx_code code = GET_CODE (operands[0]);
2439 rtx op0 = operands[1], op1 = operands[2];
2442 if (cmp_mode == TFmode)
2444 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2449 /* The general case: fold the comparison code to the types of compares
2450 that we have, choosing the branch as necessary. */
2453 case EQ: case LE: case LT: case LEU: case LTU:
2455 /* We have these compares: */
2456 cmp_code = code, branch_code = NE;
2461 /* These must be reversed. */
2462 cmp_code = reverse_condition (code), branch_code = EQ;
2465 case GE: case GT: case GEU: case GTU:
2466 /* For FP, we swap them, for INT, we reverse them. */
2467 if (cmp_mode == DFmode)
2469 cmp_code = swap_condition (code);
2471 tem = op0, op0 = op1, op1 = tem;
2475 cmp_code = reverse_condition (code);
2484 if (cmp_mode == DFmode)
2486 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2488 /* When we are not as concerned about non-finite values, and we
2489 are comparing against zero, we can branch directly. */
2490 if (op1 == CONST0_RTX (DFmode))
2491 cmp_code = UNKNOWN, branch_code = code;
2492 else if (op0 == CONST0_RTX (DFmode))
2494 /* Undo the swap we probably did just above. */
2495 tem = op0, op0 = op1, op1 = tem;
2496 branch_code = swap_condition (cmp_code);
2502 /* ??? We mark the branch mode to be CCmode to prevent the
2503 compare and branch from being combined, since the compare
2504 insn follows IEEE rules that the branch does not. */
2505 branch_mode = CCmode;
2510 /* The following optimizations are only for signed compares. */
2511 if (code != LEU && code != LTU && code != GEU && code != GTU)
2513 /* Whee. Compare and branch against 0 directly. */
2514 if (op1 == const0_rtx)
2515 cmp_code = UNKNOWN, branch_code = code;
2517 /* If the constants doesn't fit into an immediate, but can
2518 be generated by lda/ldah, we adjust the argument and
2519 compare against zero, so we can use beq/bne directly. */
2520 /* ??? Don't do this when comparing against symbols, otherwise
2521 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2522 be declared false out of hand (at least for non-weak). */
2523 else if (CONST_INT_P (op1)
2524 && (code == EQ || code == NE)
2525 && !(symbolic_operand (op0, VOIDmode)
2526 || (REG_P (op0) && REG_POINTER (op0))))
2528 rtx n_op1 = GEN_INT (-INTVAL (op1));
2530 if (! satisfies_constraint_I (op1)
2531 && (satisfies_constraint_K (n_op1)
2532 || satisfies_constraint_L (n_op1)))
2533 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2537 if (!reg_or_0_operand (op0, DImode))
2538 op0 = force_reg (DImode, op0);
2539 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2540 op1 = force_reg (DImode, op1);
2543 /* Emit an initial compare instruction, if necessary. */
2545 if (cmp_code != UNKNOWN)
2547 tem = gen_reg_rtx (cmp_mode);
2548 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2551 /* Emit the branch instruction. */
2552 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2553 gen_rtx_IF_THEN_ELSE (VOIDmode,
2554 gen_rtx_fmt_ee (branch_code,
2556 CONST0_RTX (cmp_mode)),
2557 gen_rtx_LABEL_REF (VOIDmode,
2560 emit_jump_insn (tem);
2563 /* Certain simplifications can be done to make invalid setcc operations
2564 valid. Return the final comparison, or NULL if we can't work. */
2567 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2569 enum rtx_code cmp_code;
2570 enum rtx_code code = GET_CODE (operands[1]);
2571 rtx op0 = operands[2], op1 = operands[3];
2574 if (cmp_mode == TFmode)
2576 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2581 if (cmp_mode == DFmode && !TARGET_FIX)
2584 /* The general case: fold the comparison code to the types of compares
2585 that we have, choosing the branch as necessary. */
2590 case EQ: case LE: case LT: case LEU: case LTU:
2592 /* We have these compares. */
2593 if (cmp_mode == DFmode)
2594 cmp_code = code, code = NE;
2598 if (cmp_mode == DImode && op1 == const0_rtx)
2603 cmp_code = reverse_condition (code);
2607 case GE: case GT: case GEU: case GTU:
2608 /* These normally need swapping, but for integer zero we have
2609 special patterns that recognize swapped operands. */
2610 if (cmp_mode == DImode && op1 == const0_rtx)
2612 code = swap_condition (code);
2613 if (cmp_mode == DFmode)
2614 cmp_code = code, code = NE;
2615 tmp = op0, op0 = op1, op1 = tmp;
2622 if (cmp_mode == DImode)
2624 if (!register_operand (op0, DImode))
2625 op0 = force_reg (DImode, op0);
2626 if (!reg_or_8bit_operand (op1, DImode))
2627 op1 = force_reg (DImode, op1);
2630 /* Emit an initial compare instruction, if necessary. */
2631 if (cmp_code != UNKNOWN)
2633 tmp = gen_reg_rtx (cmp_mode);
2634 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2635 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2637 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2641 /* Emit the setcc instruction. */
2642 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2643 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2648 /* Rewrite a comparison against zero CMP of the form
2649 (CODE (cc0) (const_int 0)) so it can be written validly in
2650 a conditional move (if_then_else CMP ...).
2651 If both of the operands that set cc0 are nonzero we must emit
2652 an insn to perform the compare (it can't be done within
2653 the conditional move). */
2656 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2658 enum rtx_code code = GET_CODE (cmp);
2659 enum rtx_code cmov_code = NE;
2660 rtx op0 = XEXP (cmp, 0);
2661 rtx op1 = XEXP (cmp, 1);
2662 enum machine_mode cmp_mode
2663 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2664 enum machine_mode cmov_mode = VOIDmode;
2665 int local_fast_math = flag_unsafe_math_optimizations;
2668 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2670 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2672 enum rtx_code cmp_code;
2677 /* If we have fp<->int register move instructions, do a cmov by
2678 performing the comparison in fp registers, and move the
2679 zero/nonzero value to integer registers, where we can then
2680 use a normal cmov, or vice-versa. */
2684 case EQ: case LE: case LT: case LEU: case LTU:
2685 /* We have these compares. */
2686 cmp_code = code, code = NE;
2690 /* This must be reversed. */
2691 cmp_code = EQ, code = EQ;
2694 case GE: case GT: case GEU: case GTU:
2695 /* These normally need swapping, but for integer zero we have
2696 special patterns that recognize swapped operands. */
2697 if (cmp_mode == DImode && op1 == const0_rtx)
2698 cmp_code = code, code = NE;
2701 cmp_code = swap_condition (code);
2703 tem = op0, op0 = op1, op1 = tem;
2711 tem = gen_reg_rtx (cmp_mode);
2712 emit_insn (gen_rtx_SET (VOIDmode, tem,
2713 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2716 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2717 op0 = gen_lowpart (cmp_mode, tem);
2718 op1 = CONST0_RTX (cmp_mode);
2719 local_fast_math = 1;
2722 /* We may be able to use a conditional move directly.
2723 This avoids emitting spurious compares. */
2724 if (signed_comparison_operator (cmp, VOIDmode)
2725 && (cmp_mode == DImode || local_fast_math)
2726 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2727 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2729 /* We can't put the comparison inside the conditional move;
2730 emit a compare instruction and put that inside the
2731 conditional move. Make sure we emit only comparisons we have;
2732 swap or reverse as necessary. */
2734 if (!can_create_pseudo_p ())
2739 case EQ: case LE: case LT: case LEU: case LTU:
2740 /* We have these compares: */
2744 /* This must be reversed. */
2745 code = reverse_condition (code);
2749 case GE: case GT: case GEU: case GTU:
2750 /* These must be swapped. */
2751 if (op1 != CONST0_RTX (cmp_mode))
2753 code = swap_condition (code);
2754 tem = op0, op0 = op1, op1 = tem;
2762 if (cmp_mode == DImode)
2764 if (!reg_or_0_operand (op0, DImode))
2765 op0 = force_reg (DImode, op0);
2766 if (!reg_or_8bit_operand (op1, DImode))
2767 op1 = force_reg (DImode, op1);
2770 /* ??? We mark the branch mode to be CCmode to prevent the compare
2771 and cmov from being combined, since the compare insn follows IEEE
2772 rules that the cmov does not. */
2773 if (cmp_mode == DFmode && !local_fast_math)
2776 tem = gen_reg_rtx (cmp_mode);
2777 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2778 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2781 /* Simplify a conditional move of two constants into a setcc with
2782 arithmetic. This is done with a splitter since combine would
2783 just undo the work if done during code generation. It also catches
2784 cases we wouldn't have before cse. */
2787 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2788 rtx t_rtx, rtx f_rtx)
2790 HOST_WIDE_INT t, f, diff;
2791 enum machine_mode mode;
2792 rtx target, subtarget, tmp;
2794 mode = GET_MODE (dest);
2799 if (((code == NE || code == EQ) && diff < 0)
2800 || (code == GE || code == GT))
2802 code = reverse_condition (code);
2803 diff = t, t = f, f = diff;
2807 subtarget = target = dest;
2810 target = gen_lowpart (DImode, dest);
2811 if (can_create_pseudo_p ())
2812 subtarget = gen_reg_rtx (DImode);
2816 /* Below, we must be careful to use copy_rtx on target and subtarget
2817 in intermediate insns, as they may be a subreg rtx, which may not
2820 if (f == 0 && exact_log2 (diff) > 0
2821 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2822 viable over a longer latency cmove. On EV5, the E0 slot is a
2823 scarce resource, and on EV4 shift has the same latency as a cmove. */
2824 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2826 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2827 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2829 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2830 GEN_INT (exact_log2 (t)));
2831 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2833 else if (f == 0 && t == -1)
2835 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2836 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2838 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2840 else if (diff == 1 || diff == 4 || diff == 8)
2844 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2845 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2848 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2851 add_op = GEN_INT (f);
2852 if (sext_add_operand (add_op, mode))
2854 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2856 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2857 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2869 /* Look up the function X_floating library function name for the
2872 struct GTY(()) xfloating_op
2874 const enum rtx_code code;
2875 const char *const GTY((skip)) osf_func;
2876 const char *const GTY((skip)) vms_func;
2880 static GTY(()) struct xfloating_op xfloating_ops[] =
2882 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2883 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2884 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2885 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2886 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2887 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2888 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2889 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2890 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2891 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2892 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2893 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2894 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2895 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2896 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2899 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2901 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2902 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2906 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2908 struct xfloating_op *ops = xfloating_ops;
2909 long n = ARRAY_SIZE (xfloating_ops);
2912 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2914 /* How irritating. Nothing to key off for the main table. */
2915 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2918 n = ARRAY_SIZE (vax_cvt_ops);
2921 for (i = 0; i < n; ++i, ++ops)
2922 if (ops->code == code)
2924 rtx func = ops->libcall;
2927 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2928 ? ops->vms_func : ops->osf_func);
2929 ops->libcall = func;
2937 /* Most X_floating operations take the rounding mode as an argument.
2938 Compute that here. */
2941 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2942 enum alpha_fp_rounding_mode round)
2948 case ALPHA_FPRM_NORM:
2951 case ALPHA_FPRM_MINF:
2954 case ALPHA_FPRM_CHOP:
2957 case ALPHA_FPRM_DYN:
2963 /* XXX For reference, round to +inf is mode = 3. */
2966 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2972 /* Emit an X_floating library function call.
2974 Note that these functions do not follow normal calling conventions:
2975 TFmode arguments are passed in two integer registers (as opposed to
2976 indirect); TFmode return values appear in R16+R17.
2978 FUNC is the function to call.
2979 TARGET is where the output belongs.
2980 OPERANDS are the inputs.
2981 NOPERANDS is the count of inputs.
2982 EQUIV is the expression equivalent for the function.
2986 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2987 int noperands, rtx equiv)
2989 rtx usage = NULL_RTX, tmp, reg;
2994 for (i = 0; i < noperands; ++i)
2996 switch (GET_MODE (operands[i]))
2999 reg = gen_rtx_REG (TFmode, regno);
3004 reg = gen_rtx_REG (DFmode, regno + 32);
3009 gcc_assert (CONST_INT_P (operands[i]));
3012 reg = gen_rtx_REG (DImode, regno);
3020 emit_move_insn (reg, operands[i]);
3021 usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
3024 switch (GET_MODE (target))
3027 reg = gen_rtx_REG (TFmode, 16);
3030 reg = gen_rtx_REG (DFmode, 32);
3033 reg = gen_rtx_REG (DImode, 0);
3039 tmp = gen_rtx_MEM (QImode, func);
3040 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3041 const0_rtx, const0_rtx));
3042 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3043 RTL_CONST_CALL_P (tmp) = 1;
3048 emit_libcall_block (tmp, target, reg, equiv);
3051 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3054 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3058 rtx out_operands[3];
3060 func = alpha_lookup_xfloating_lib_func (code);
3061 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3063 out_operands[0] = operands[1];
3064 out_operands[1] = operands[2];
3065 out_operands[2] = GEN_INT (mode);
3066 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3067 gen_rtx_fmt_ee (code, TFmode, operands[1],
3071 /* Emit an X_floating library function call for a comparison. */
3074 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3076 enum rtx_code cmp_code, res_code;
3077 rtx func, out, operands[2], note;
3079 /* X_floating library comparison functions return
3083 Convert the compare against the raw return value. */
3111 func = alpha_lookup_xfloating_lib_func (cmp_code);
3115 out = gen_reg_rtx (DImode);
3117 /* What's actually returned is -1,0,1, not a proper boolean value,
3118 so use an EXPR_LIST as with a generic libcall instead of a
3119 comparison type expression. */
3120 note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
3121 note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
3122 note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
3123 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3128 /* Emit an X_floating library function call for a conversion. */
3131 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3133 int noperands = 1, mode;
3134 rtx out_operands[2];
3136 enum rtx_code code = orig_code;
3138 if (code == UNSIGNED_FIX)
3141 func = alpha_lookup_xfloating_lib_func (code);
3143 out_operands[0] = operands[1];
3148 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3149 out_operands[1] = GEN_INT (mode);
3152 case FLOAT_TRUNCATE:
3153 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3154 out_operands[1] = GEN_INT (mode);
3161 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3162 gen_rtx_fmt_e (orig_code,
3163 GET_MODE (operands[0]),
3167 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3168 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3169 guarantee that the sequence
3172 is valid. Naturally, output operand ordering is little-endian.
3173 This is used by *movtf_internal and *movti_internal. */
3176 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3179 switch (GET_CODE (operands[1]))
3182 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3183 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3187 operands[3] = adjust_address (operands[1], DImode, 8);
3188 operands[2] = adjust_address (operands[1], DImode, 0);
3193 gcc_assert (operands[1] == CONST0_RTX (mode));
3194 operands[2] = operands[3] = const0_rtx;
3201 switch (GET_CODE (operands[0]))
3204 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3205 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3209 operands[1] = adjust_address (operands[0], DImode, 8);
3210 operands[0] = adjust_address (operands[0], DImode, 0);
3217 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3220 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3221 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3225 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3226 op2 is a register containing the sign bit, operation is the
3227 logical operation to be performed. */
3230 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3232 rtx high_bit = operands[2];
3236 alpha_split_tmode_pair (operands, TFmode, false);
3238 /* Detect three flavors of operand overlap. */
3240 if (rtx_equal_p (operands[0], operands[2]))
3242 else if (rtx_equal_p (operands[1], operands[2]))
3244 if (rtx_equal_p (operands[0], high_bit))
3251 emit_move_insn (operands[0], operands[2]);
3253 /* ??? If the destination overlaps both source tf and high_bit, then
3254 assume source tf is dead in its entirety and use the other half
3255 for a scratch register. Otherwise "scratch" is just the proper
3256 destination register. */
3257 scratch = operands[move < 2 ? 1 : 3];
3259 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3263 emit_move_insn (operands[0], operands[2]);
3265 emit_move_insn (operands[1], scratch);
3269 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3273 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3274 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3275 lda r3,X(r11) lda r3,X+2(r11)
3276 extwl r1,r3,r1 extql r1,r3,r1
3277 extwh r2,r3,r2 extqh r2,r3,r2
3278 or r1.r2.r1 or r1,r2,r1
3281 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3282 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3283 lda r3,X(r11) lda r3,X(r11)
3284 extll r1,r3,r1 extll r1,r3,r1
3285 extlh r2,r3,r2 extlh r2,r3,r2
3286 or r1.r2.r1 addl r1,r2,r1
3288 quad: ldq_u r1,X(r11)
3297 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3298 HOST_WIDE_INT ofs, int sign)
3300 rtx meml, memh, addr, extl, exth, tmp, mema;
3301 enum machine_mode mode;
3303 if (TARGET_BWX && size == 2)
3305 meml = adjust_address (mem, QImode, ofs);
3306 memh = adjust_address (mem, QImode, ofs+1);
3307 if (BYTES_BIG_ENDIAN)
3308 tmp = meml, meml = memh, memh = tmp;
3309 extl = gen_reg_rtx (DImode);
3310 exth = gen_reg_rtx (DImode);
3311 emit_insn (gen_zero_extendqidi2 (extl, meml));
3312 emit_insn (gen_zero_extendqidi2 (exth, memh));
3313 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3314 NULL, 1, OPTAB_LIB_WIDEN);
3315 addr = expand_simple_binop (DImode, IOR, extl, exth,
3316 NULL, 1, OPTAB_LIB_WIDEN);
3318 if (sign && GET_MODE (tgt) != HImode)
3320 addr = gen_lowpart (HImode, addr);
3321 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3325 if (GET_MODE (tgt) != DImode)
3326 addr = gen_lowpart (GET_MODE (tgt), addr);
3327 emit_move_insn (tgt, addr);
3332 meml = gen_reg_rtx (DImode);
3333 memh = gen_reg_rtx (DImode);
3334 addr = gen_reg_rtx (DImode);
3335 extl = gen_reg_rtx (DImode);
3336 exth = gen_reg_rtx (DImode);
3338 mema = XEXP (mem, 0);
3339 if (GET_CODE (mema) == LO_SUM)
3340 mema = force_reg (Pmode, mema);
3342 /* AND addresses cannot be in any alias set, since they may implicitly
3343 alias surrounding code. Ideally we'd have some alias set that
3344 covered all types except those with alignment 8 or higher. */
3346 tmp = change_address (mem, DImode,
3347 gen_rtx_AND (DImode,
3348 plus_constant (mema, ofs),
3350 set_mem_alias_set (tmp, 0);
3351 emit_move_insn (meml, tmp);
3353 tmp = change_address (mem, DImode,
3354 gen_rtx_AND (DImode,
3355 plus_constant (mema, ofs + size - 1),
3357 set_mem_alias_set (tmp, 0);
3358 emit_move_insn (memh, tmp);
3360 if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4))
3362 emit_move_insn (addr, plus_constant (mema, -1));
3364 emit_insn (gen_extqh_be (extl, meml, addr));
3365 emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr));
3367 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3368 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8),
3369 addr, 1, OPTAB_WIDEN);
3371 else if (sign && size == 2)
3373 emit_move_insn (addr, plus_constant (mema, ofs+2));
3375 emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr));
3376 emit_insn (gen_extqh_le (exth, memh, addr));
3378 /* We must use tgt here for the target. Alpha-vms port fails if we use
3379 addr for the target, because addr is marked as a pointer and combine
3380 knows that pointers are always sign-extended 32-bit values. */
3381 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3382 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3383 addr, 1, OPTAB_WIDEN);
3387 if (WORDS_BIG_ENDIAN)
3389 emit_move_insn (addr, plus_constant (mema, ofs+size-1));
3393 emit_insn (gen_extwh_be (extl, meml, addr));
3398 emit_insn (gen_extlh_be (extl, meml, addr));
3403 emit_insn (gen_extqh_be (extl, meml, addr));
3410 emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr));
3414 emit_move_insn (addr, plus_constant (mema, ofs));
3415 emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr));
3419 emit_insn (gen_extwh_le (exth, memh, addr));
3424 emit_insn (gen_extlh_le (exth, memh, addr));
3429 emit_insn (gen_extqh_le (exth, memh, addr));
3438 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3439 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3444 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3447 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3450 alpha_expand_unaligned_store (rtx dst, rtx src,
3451 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3453 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3455 if (TARGET_BWX && size == 2)
3457 if (src != const0_rtx)
3459 dstl = gen_lowpart (QImode, src);
3460 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3461 NULL, 1, OPTAB_LIB_WIDEN);
3462 dsth = gen_lowpart (QImode, dsth);
3465 dstl = dsth = const0_rtx;