1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS,
66 /* We create new type nodes for these. */
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 /* These all correspond to intSI_type_node */
95 /* These correspond to the standard types */
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128 struct spu_builtin_range
133 static struct spu_builtin_range spu_builtin_range[] = {
134 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
135 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
136 {0ll, 0x7fll}, /* SPU_BTI_U7 */
137 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
138 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
139 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
140 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
141 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
142 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
143 {0ll, 0xffffll}, /* SPU_BTI_U16 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
145 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149 /* Target specific attribute specifications. */
150 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
152 /* Prototypes and external defs. */
153 static void spu_init_builtins (void);
154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int regno_aligned_for_load (int regno);
193 static int store_with_one_insn_p (rtx mem);
194 static int mem_is_padded_component_ref (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static tree spu_builtin_mul_widen_even (tree);
205 static tree spu_builtin_mul_widen_odd (tree);
206 static tree spu_builtin_mask_for_load (void);
207 static int spu_builtin_vectorization_cost (bool);
208 static bool spu_vector_alignment_reachable (const_tree, bool);
209 static tree spu_builtin_vec_perm (tree, tree *);
210 static int spu_sms_res_mii (struct ddg *g);
211 static void asm_file_start (void);
212 static unsigned int spu_section_type_flags (tree, const char *, int);
214 extern const char *reg_names[];
215 rtx spu_compare_op0, spu_compare_op1;
217 /* Which instruction set architecture to use. */
219 /* Which cpu are we tuning for. */
222 /* The hardware requires 8 insns between a hint and the branch it
223 effects. This variable describes how many rtl instructions the
224 compiler needs to see before inserting a hint, and then the compiler
225 will insert enough nops to make it at least 8 insns. The default is
226 for the compiler to allow up to 2 nops be emitted. The nops are
227 inserted in pairs, so we round down. */
228 int spu_hint_dist = (8*4) - (2*4);
230 /* Determines whether we run variable tracking in machine dependent
232 static int spu_flag_var_tracking;
247 IC_POOL, /* constant pool */
248 IC_IL1, /* one il* instruction */
249 IC_IL2, /* both ilhu and iohl instructions */
250 IC_IL1s, /* one il* instruction */
251 IC_IL2s, /* both ilhu and iohl instructions */
252 IC_FSMBI, /* the fsmbi instruction */
253 IC_CPAT, /* one of the c*d instructions */
254 IC_FSMBI2 /* fsmbi plus 1 other instruction */
257 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
258 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
259 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
260 static enum immediate_class classify_immediate (rtx op,
261 enum machine_mode mode);
263 static enum machine_mode spu_unwind_word_mode (void);
265 static enum machine_mode
266 spu_libgcc_cmp_return_mode (void);
268 static enum machine_mode
269 spu_libgcc_shift_count_mode (void);
272 /* TARGET overrides. */
274 #undef TARGET_INIT_BUILTINS
275 #define TARGET_INIT_BUILTINS spu_init_builtins
277 #undef TARGET_EXPAND_BUILTIN
278 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
280 #undef TARGET_UNWIND_WORD_MODE
281 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
283 /* The .8byte directive doesn't seem to work well for a 32 bit
285 #undef TARGET_ASM_UNALIGNED_DI_OP
286 #define TARGET_ASM_UNALIGNED_DI_OP NULL
288 #undef TARGET_RTX_COSTS
289 #define TARGET_RTX_COSTS spu_rtx_costs
291 #undef TARGET_ADDRESS_COST
292 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
294 #undef TARGET_SCHED_ISSUE_RATE
295 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
297 #undef TARGET_SCHED_INIT_GLOBAL
298 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
300 #undef TARGET_SCHED_INIT
301 #define TARGET_SCHED_INIT spu_sched_init
303 #undef TARGET_SCHED_VARIABLE_ISSUE
304 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
306 #undef TARGET_SCHED_REORDER
307 #define TARGET_SCHED_REORDER spu_sched_reorder
309 #undef TARGET_SCHED_REORDER2
310 #define TARGET_SCHED_REORDER2 spu_sched_reorder
312 #undef TARGET_SCHED_ADJUST_COST
313 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
315 const struct attribute_spec spu_attribute_table[];
316 #undef TARGET_ATTRIBUTE_TABLE
317 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
319 #undef TARGET_ASM_INTEGER
320 #define TARGET_ASM_INTEGER spu_assemble_integer
322 #undef TARGET_SCALAR_MODE_SUPPORTED_P
323 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
325 #undef TARGET_VECTOR_MODE_SUPPORTED_P
326 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
328 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
329 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
331 #undef TARGET_ASM_GLOBALIZE_LABEL
332 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
337 #undef TARGET_MUST_PASS_IN_STACK
338 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
340 #undef TARGET_BUILD_BUILTIN_VA_LIST
341 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
343 #undef TARGET_EXPAND_BUILTIN_VA_START
344 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
346 #undef TARGET_SETUP_INCOMING_VARARGS
347 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
349 #undef TARGET_MACHINE_DEPENDENT_REORG
350 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
352 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
353 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
355 #undef TARGET_DEFAULT_TARGET_FLAGS
356 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
358 #undef TARGET_INIT_LIBFUNCS
359 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
361 #undef TARGET_RETURN_IN_MEMORY
362 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
364 #undef TARGET_ENCODE_SECTION_INFO
365 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
367 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
368 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
370 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
371 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
373 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
374 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
376 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
377 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
379 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
380 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
382 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
383 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
385 #undef TARGET_LIBGCC_CMP_RETURN_MODE
386 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
388 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
389 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
391 #undef TARGET_SCHED_SMS_RES_MII
392 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
394 #undef TARGET_ASM_FILE_START
395 #define TARGET_ASM_FILE_START asm_file_start
397 #undef TARGET_SECTION_TYPE_FLAGS
398 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
400 struct gcc_target targetm = TARGET_INITIALIZER;
403 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
405 /* Override some of the default param values. With so many registers
406 larger values are better for these params. */
407 MAX_PENDING_LIST_LENGTH = 128;
409 /* With so many registers this is better on by default. */
410 flag_rename_registers = 1;
413 /* Sometimes certain combinations of command options do not make sense
414 on a particular target machine. You can define a macro
415 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
416 executed once just after all the command options have been parsed. */
418 spu_override_options (void)
420 /* Small loops will be unpeeled at -O3. For SPU it is more important
421 to keep code small by default. */
422 if (!flag_unroll_loops && !flag_peel_loops
423 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
424 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
426 flag_omit_frame_pointer = 1;
428 /* Functions must be 8 byte aligned so we correctly handle dual issue */
429 if (align_functions < 8)
432 spu_hint_dist = 8*4 - spu_max_nops*4;
433 if (spu_hint_dist < 0)
436 if (spu_fixed_range_string)
437 fix_range (spu_fixed_range_string);
439 /* Determine processor architectural level. */
442 if (strcmp (&spu_arch_string[0], "cell") == 0)
443 spu_arch = PROCESSOR_CELL;
444 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
445 spu_arch = PROCESSOR_CELLEDP;
447 error ("Unknown architecture '%s'", &spu_arch_string[0]);
450 /* Determine processor to tune for. */
453 if (strcmp (&spu_tune_string[0], "cell") == 0)
454 spu_tune = PROCESSOR_CELL;
455 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
456 spu_tune = PROCESSOR_CELLEDP;
458 error ("Unknown architecture '%s'", &spu_tune_string[0]);
461 /* Change defaults according to the processor architecture. */
462 if (spu_arch == PROCESSOR_CELLEDP)
464 /* If no command line option has been otherwise specified, change
465 the default to -mno-safe-hints on celledp -- only the original
466 Cell/B.E. processors require this workaround. */
467 if (!(target_flags_explicit & MASK_SAFE_HINTS))
468 target_flags &= ~MASK_SAFE_HINTS;
471 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
474 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
475 struct attribute_spec.handler. */
477 /* Table of machine attributes. */
478 const struct attribute_spec spu_attribute_table[] =
480 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
481 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
482 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
483 { NULL, 0, 0, false, false, false, NULL }
486 /* True if MODE is valid for the target. By "valid", we mean able to
487 be manipulated in non-trivial ways. In particular, this means all
488 the arithmetic is supported. */
490 spu_scalar_mode_supported_p (enum machine_mode mode)
508 /* Similarly for vector modes. "Supported" here is less strict. At
509 least some operations are supported; need to check optabs or builtins
510 for further details. */
512 spu_vector_mode_supported_p (enum machine_mode mode)
529 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
530 least significant bytes of the outer mode. This function returns
531 TRUE for the SUBREG's where this is correct. */
533 valid_subreg (rtx op)
535 enum machine_mode om = GET_MODE (op);
536 enum machine_mode im = GET_MODE (SUBREG_REG (op));
537 return om != VOIDmode && im != VOIDmode
538 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
539 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
540 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
543 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
544 and adjust the start offset. */
546 adjust_operand (rtx op, HOST_WIDE_INT * start)
548 enum machine_mode mode;
550 /* Strip any paradoxical SUBREG. */
551 if (GET_CODE (op) == SUBREG
552 && (GET_MODE_BITSIZE (GET_MODE (op))
553 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
557 GET_MODE_BITSIZE (GET_MODE (op)) -
558 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
559 op = SUBREG_REG (op);
561 /* If it is smaller than SI, assure a SUBREG */
562 op_size = GET_MODE_BITSIZE (GET_MODE (op));
566 *start += 32 - op_size;
569 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
570 mode = mode_for_size (op_size, MODE_INT, 0);
571 if (mode != GET_MODE (op))
572 op = gen_rtx_SUBREG (mode, op, 0);
577 spu_expand_extv (rtx ops[], int unsignedp)
579 HOST_WIDE_INT width = INTVAL (ops[2]);
580 HOST_WIDE_INT start = INTVAL (ops[3]);
581 HOST_WIDE_INT src_size, dst_size;
582 enum machine_mode src_mode, dst_mode;
583 rtx dst = ops[0], src = ops[1];
586 dst = adjust_operand (ops[0], 0);
587 dst_mode = GET_MODE (dst);
588 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
590 src = adjust_operand (src, &start);
591 src_mode = GET_MODE (src);
592 src_size = GET_MODE_BITSIZE (GET_MODE (src));
596 s = gen_reg_rtx (src_mode);
600 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
603 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
606 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
614 if (width < src_size)
621 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
624 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
627 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
632 s = gen_reg_rtx (src_mode);
633 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
638 convert_move (dst, src, unsignedp);
642 spu_expand_insv (rtx ops[])
644 HOST_WIDE_INT width = INTVAL (ops[1]);
645 HOST_WIDE_INT start = INTVAL (ops[2]);
646 HOST_WIDE_INT maskbits;
647 enum machine_mode dst_mode, src_mode;
648 rtx dst = ops[0], src = ops[3];
649 int dst_size, src_size;
655 if (GET_CODE (ops[0]) == MEM)
656 dst = gen_reg_rtx (TImode);
658 dst = adjust_operand (dst, &start);
659 dst_mode = GET_MODE (dst);
660 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
662 if (CONSTANT_P (src))
664 enum machine_mode m =
665 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
666 src = force_reg (m, convert_to_mode (m, src, 0));
668 src = adjust_operand (src, 0);
669 src_mode = GET_MODE (src);
670 src_size = GET_MODE_BITSIZE (GET_MODE (src));
672 mask = gen_reg_rtx (dst_mode);
673 shift_reg = gen_reg_rtx (dst_mode);
674 shift = dst_size - start - width;
676 /* It's not safe to use subreg here because the compiler assumes
677 that the SUBREG_REG is right justified in the SUBREG. */
678 convert_move (shift_reg, src, 1);
685 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
688 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
691 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
703 maskbits = (-1ll << (32 - width - start));
705 maskbits += (1ll << (32 - start));
706 emit_move_insn (mask, GEN_INT (maskbits));
709 maskbits = (-1ll << (64 - width - start));
711 maskbits += (1ll << (64 - start));
712 emit_move_insn (mask, GEN_INT (maskbits));
716 unsigned char arr[16];
718 memset (arr, 0, sizeof (arr));
719 arr[i] = 0xff >> (start & 7);
720 for (i++; i <= (start + width - 1) / 8; i++)
722 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
723 emit_move_insn (mask, array_to_constant (TImode, arr));
729 if (GET_CODE (ops[0]) == MEM)
731 rtx aligned = gen_reg_rtx (SImode);
732 rtx low = gen_reg_rtx (SImode);
733 rtx addr = gen_reg_rtx (SImode);
734 rtx rotl = gen_reg_rtx (SImode);
735 rtx mask0 = gen_reg_rtx (TImode);
738 emit_move_insn (addr, XEXP (ops[0], 0));
739 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
740 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
741 emit_insn (gen_negsi2 (rotl, low));
742 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
743 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
744 mem = change_address (ops[0], TImode, aligned);
745 set_mem_alias_set (mem, 0);
746 emit_move_insn (dst, mem);
747 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
748 emit_move_insn (mem, dst);
749 if (start + width > MEM_ALIGN (ops[0]))
751 rtx shl = gen_reg_rtx (SImode);
752 rtx mask1 = gen_reg_rtx (TImode);
753 rtx dst1 = gen_reg_rtx (TImode);
755 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
756 emit_insn (gen_shlqby_ti (mask1, mask, shl));
757 mem1 = adjust_address (mem, TImode, 16);
758 set_mem_alias_set (mem1, 0);
759 emit_move_insn (dst1, mem1);
760 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
761 emit_move_insn (mem1, dst1);
765 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
770 spu_expand_block_move (rtx ops[])
772 HOST_WIDE_INT bytes, align, offset;
773 rtx src, dst, sreg, dreg, target;
775 if (GET_CODE (ops[2]) != CONST_INT
776 || GET_CODE (ops[3]) != CONST_INT
777 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
780 bytes = INTVAL (ops[2]);
781 align = INTVAL (ops[3]);
791 for (offset = 0; offset + 16 <= bytes; offset += 16)
793 dst = adjust_address (ops[0], V16QImode, offset);
794 src = adjust_address (ops[1], V16QImode, offset);
795 emit_move_insn (dst, src);
800 unsigned char arr[16] = { 0 };
801 for (i = 0; i < bytes - offset; i++)
803 dst = adjust_address (ops[0], V16QImode, offset);
804 src = adjust_address (ops[1], V16QImode, offset);
805 mask = gen_reg_rtx (V16QImode);
806 sreg = gen_reg_rtx (V16QImode);
807 dreg = gen_reg_rtx (V16QImode);
808 target = gen_reg_rtx (V16QImode);
809 emit_move_insn (mask, array_to_constant (V16QImode, arr));
810 emit_move_insn (dreg, dst);
811 emit_move_insn (sreg, src);
812 emit_insn (gen_selb (target, dreg, sreg, mask));
813 emit_move_insn (dst, target);
821 { SPU_EQ, SPU_GT, SPU_GTU };
823 int spu_comp_icode[12][3] = {
824 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
825 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
826 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
827 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
828 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
829 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
830 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
831 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
832 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
833 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
834 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
835 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
838 /* Generate a compare for CODE. Return a brand-new rtx that represents
839 the result of the compare. GCC can figure this out too if we don't
840 provide all variations of compares, but GCC always wants to use
841 WORD_MODE, we can generate better code in most cases if we do it
844 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
846 int reverse_compare = 0;
847 int reverse_test = 0;
848 rtx compare_result, eq_result;
849 rtx comp_rtx, eq_rtx;
850 rtx target = operands[0];
851 enum machine_mode comp_mode;
852 enum machine_mode op_mode;
853 enum spu_comp_code scode, eq_code;
854 enum insn_code ior_code;
858 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
859 and so on, to keep the constant in operand 1. */
860 if (GET_CODE (spu_compare_op1) == CONST_INT)
862 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
863 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
867 spu_compare_op1 = GEN_INT (val);
871 spu_compare_op1 = GEN_INT (val);
875 spu_compare_op1 = GEN_INT (val);
879 spu_compare_op1 = GEN_INT (val);
888 op_mode = GET_MODE (spu_compare_op0);
894 if (HONOR_NANS (op_mode))
909 if (HONOR_NANS (op_mode))
1001 comp_mode = V4SImode;
1005 comp_mode = V2DImode;
1012 if (GET_MODE (spu_compare_op1) == DFmode
1013 && (scode != SPU_GT && scode != SPU_EQ))
1016 if (is_set == 0 && spu_compare_op1 == const0_rtx
1017 && (GET_MODE (spu_compare_op0) == SImode
1018 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
1020 /* Don't need to set a register with the result when we are
1021 comparing against zero and branching. */
1022 reverse_test = !reverse_test;
1023 compare_result = spu_compare_op0;
1027 compare_result = gen_reg_rtx (comp_mode);
1029 if (reverse_compare)
1031 rtx t = spu_compare_op1;
1032 spu_compare_op1 = spu_compare_op0;
1033 spu_compare_op0 = t;
1036 if (spu_comp_icode[index][scode] == 0)
1039 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1040 (spu_compare_op0, op_mode))
1041 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
1042 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1043 (spu_compare_op1, op_mode))
1044 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
1045 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1050 emit_insn (comp_rtx);
1054 eq_result = gen_reg_rtx (comp_mode);
1055 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1061 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1062 gcc_assert (ior_code != CODE_FOR_nothing);
1063 emit_insn (GEN_FCN (ior_code)
1064 (compare_result, compare_result, eq_result));
1073 /* We don't have branch on QI compare insns, so we convert the
1074 QI compare result to a HI result. */
1075 if (comp_mode == QImode)
1077 rtx old_res = compare_result;
1078 compare_result = gen_reg_rtx (HImode);
1080 emit_insn (gen_extendqihi2 (compare_result, old_res));
1084 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1086 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1088 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1089 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1090 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1093 else if (is_set == 2)
1095 int compare_size = GET_MODE_BITSIZE (comp_mode);
1096 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1097 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1099 rtx op_t = operands[2];
1100 rtx op_f = operands[3];
1102 /* The result of the comparison can be SI, HI or QI mode. Create a
1103 mask based on that result. */
1104 if (target_size > compare_size)
1106 select_mask = gen_reg_rtx (mode);
1107 emit_insn (gen_extend_compare (select_mask, compare_result));
1109 else if (target_size < compare_size)
1111 gen_rtx_SUBREG (mode, compare_result,
1112 (compare_size - target_size) / BITS_PER_UNIT);
1113 else if (comp_mode != mode)
1114 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1116 select_mask = compare_result;
1118 if (GET_MODE (target) != GET_MODE (op_t)
1119 || GET_MODE (target) != GET_MODE (op_f))
1123 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1125 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1130 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1131 gen_rtx_NOT (comp_mode, compare_result)));
1132 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1133 emit_insn (gen_extendhisi2 (target, compare_result));
1134 else if (GET_MODE (target) == SImode
1135 && GET_MODE (compare_result) == QImode)
1136 emit_insn (gen_extend_compare (target, compare_result));
1138 emit_move_insn (target, compare_result);
1143 const_double_to_hwint (rtx x)
1147 if (GET_MODE (x) == SFmode)
1149 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1150 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1152 else if (GET_MODE (x) == DFmode)
1155 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1156 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1158 val = (val << 32) | (l[1] & 0xffffffff);
1166 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1170 gcc_assert (mode == SFmode || mode == DFmode);
1173 tv[0] = (v << 32) >> 32;
1174 else if (mode == DFmode)
1176 tv[1] = (v << 32) >> 32;
1179 real_from_target (&rv, tv, mode);
1180 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1184 print_operand_address (FILE * file, register rtx addr)
1189 if (GET_CODE (addr) == AND
1190 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1191 && INTVAL (XEXP (addr, 1)) == -16)
1192 addr = XEXP (addr, 0);
1194 switch (GET_CODE (addr))
1197 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1201 reg = XEXP (addr, 0);
1202 offset = XEXP (addr, 1);
1203 if (GET_CODE (offset) == REG)
1205 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1206 reg_names[REGNO (offset)]);
1208 else if (GET_CODE (offset) == CONST_INT)
1210 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1211 INTVAL (offset), reg_names[REGNO (reg)]);
1221 output_addr_const (file, addr);
1231 print_operand (FILE * file, rtx x, int code)
1233 enum machine_mode mode = GET_MODE (x);
1235 unsigned char arr[16];
1236 int xcode = GET_CODE (x);
1238 if (GET_MODE (x) == VOIDmode)
1241 case 'L': /* 128 bits, signed */
1242 case 'm': /* 128 bits, signed */
1243 case 'T': /* 128 bits, signed */
1244 case 't': /* 128 bits, signed */
1247 case 'K': /* 64 bits, signed */
1248 case 'k': /* 64 bits, signed */
1249 case 'D': /* 64 bits, signed */
1250 case 'd': /* 64 bits, signed */
1253 case 'J': /* 32 bits, signed */
1254 case 'j': /* 32 bits, signed */
1255 case 's': /* 32 bits, signed */
1256 case 'S': /* 32 bits, signed */
1263 case 'j': /* 32 bits, signed */
1264 case 'k': /* 64 bits, signed */
1265 case 'm': /* 128 bits, signed */
1266 if (xcode == CONST_INT
1267 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1269 gcc_assert (logical_immediate_p (x, mode));
1270 constant_to_array (mode, x, arr);
1271 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1272 val = trunc_int_for_mode (val, SImode);
1273 switch (which_logical_immediate (val))
1278 fprintf (file, "h");
1281 fprintf (file, "b");
1291 case 'J': /* 32 bits, signed */
1292 case 'K': /* 64 bits, signed */
1293 case 'L': /* 128 bits, signed */
1294 if (xcode == CONST_INT
1295 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1297 gcc_assert (logical_immediate_p (x, mode)
1298 || iohl_immediate_p (x, mode));
1299 constant_to_array (mode, x, arr);
1300 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1301 val = trunc_int_for_mode (val, SImode);
1302 switch (which_logical_immediate (val))
1308 val = trunc_int_for_mode (val, HImode);
1311 val = trunc_int_for_mode (val, QImode);
1316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1322 case 't': /* 128 bits, signed */
1323 case 'd': /* 64 bits, signed */
1324 case 's': /* 32 bits, signed */
1327 enum immediate_class c = classify_immediate (x, mode);
1331 constant_to_array (mode, x, arr);
1332 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1333 val = trunc_int_for_mode (val, SImode);
1334 switch (which_immediate_load (val))
1339 fprintf (file, "a");
1342 fprintf (file, "h");
1345 fprintf (file, "hu");
1352 constant_to_array (mode, x, arr);
1353 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1355 fprintf (file, "b");
1357 fprintf (file, "h");
1359 fprintf (file, "w");
1361 fprintf (file, "d");
1364 if (xcode == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 xcode = GET_CODE (x);
1369 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1370 fprintf (file, "a");
1371 else if (xcode == HIGH)
1372 fprintf (file, "hu");
1386 case 'T': /* 128 bits, signed */
1387 case 'D': /* 64 bits, signed */
1388 case 'S': /* 32 bits, signed */
1391 enum immediate_class c = classify_immediate (x, mode);
1395 constant_to_array (mode, x, arr);
1396 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1397 val = trunc_int_for_mode (val, SImode);
1398 switch (which_immediate_load (val))
1405 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1413 constant_to_array (mode, x, arr);
1415 for (i = 0; i < 16; i++)
1420 print_operand (file, GEN_INT (val), 0);
1423 constant_to_array (mode, x, arr);
1424 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1430 if (GET_CODE (x) == CONST_VECTOR)
1431 x = CONST_VECTOR_ELT (x, 0);
1432 output_addr_const (file, x);
1434 fprintf (file, "@h");
1448 if (xcode == CONST_INT)
1450 /* Only 4 least significant bits are relevant for generate
1451 control word instructions. */
1452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1457 case 'M': /* print code for c*d */
1458 if (GET_CODE (x) == CONST_INT)
1462 fprintf (file, "b");
1465 fprintf (file, "h");
1468 fprintf (file, "w");
1471 fprintf (file, "d");
1480 case 'N': /* Negate the operand */
1481 if (xcode == CONST_INT)
1482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1483 else if (xcode == CONST_VECTOR)
1484 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1485 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1488 case 'I': /* enable/disable interrupts */
1489 if (xcode == CONST_INT)
1490 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1493 case 'b': /* branch modifiers */
1495 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1496 else if (COMPARISON_P (x))
1497 fprintf (file, "%s", xcode == NE ? "n" : "");
1500 case 'i': /* indirect call */
1503 if (GET_CODE (XEXP (x, 0)) == REG)
1504 /* Used in indirect function calls. */
1505 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1507 output_address (XEXP (x, 0));
1511 case 'p': /* load/store */
1515 xcode = GET_CODE (x);
1520 xcode = GET_CODE (x);
1523 fprintf (file, "d");
1524 else if (xcode == CONST_INT)
1525 fprintf (file, "a");
1526 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1527 fprintf (file, "r");
1528 else if (xcode == PLUS || xcode == LO_SUM)
1530 if (GET_CODE (XEXP (x, 1)) == REG)
1531 fprintf (file, "x");
1533 fprintf (file, "d");
1538 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1540 output_addr_const (file, GEN_INT (val));
1544 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1546 output_addr_const (file, GEN_INT (val));
1550 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1552 output_addr_const (file, GEN_INT (val));
1556 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1557 val = (val >> 3) & 0x1f;
1558 output_addr_const (file, GEN_INT (val));
1562 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1565 output_addr_const (file, GEN_INT (val));
1569 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1572 output_addr_const (file, GEN_INT (val));
1576 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1579 output_addr_const (file, GEN_INT (val));
1583 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1584 val = -(val & -8ll);
1585 val = (val >> 3) & 0x1f;
1586 output_addr_const (file, GEN_INT (val));
1591 fprintf (file, "%s", reg_names[REGNO (x)]);
1592 else if (xcode == MEM)
1593 output_address (XEXP (x, 0));
1594 else if (xcode == CONST_VECTOR)
1595 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1597 output_addr_const (file, x);
1604 output_operand_lossage ("invalid %%xn code");
1609 extern char call_used_regs[];
1611 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1612 caller saved register. For leaf functions it is more efficient to
1613 use a volatile register because we won't need to save and restore the
1614 pic register. This routine is only valid after register allocation
1615 is completed, so we can pick an unused register. */
1619 rtx pic_reg = pic_offset_table_rtx;
1620 if (!reload_completed && !reload_in_progress)
1625 /* Split constant addresses to handle cases that are too large.
1626 Add in the pic register when in PIC mode.
1627 Split immediates that require more than 1 instruction. */
1629 spu_split_immediate (rtx * ops)
1631 enum machine_mode mode = GET_MODE (ops[0]);
1632 enum immediate_class c = classify_immediate (ops[1], mode);
1638 unsigned char arrhi[16];
1639 unsigned char arrlo[16];
1640 rtx to, temp, hi, lo;
1642 enum machine_mode imode = mode;
1643 /* We need to do reals as ints because the constant used in the
1644 IOR might not be a legitimate real constant. */
1645 imode = int_mode_for_mode (mode);
1646 constant_to_array (mode, ops[1], arrhi);
1648 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1651 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1652 for (i = 0; i < 16; i += 4)
1654 arrlo[i + 2] = arrhi[i + 2];
1655 arrlo[i + 3] = arrhi[i + 3];
1656 arrlo[i + 0] = arrlo[i + 1] = 0;
1657 arrhi[i + 2] = arrhi[i + 3] = 0;
1659 hi = array_to_constant (imode, arrhi);
1660 lo = array_to_constant (imode, arrlo);
1661 emit_move_insn (temp, hi);
1662 emit_insn (gen_rtx_SET
1663 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1668 unsigned char arr_fsmbi[16];
1669 unsigned char arr_andbi[16];
1670 rtx to, reg_fsmbi, reg_and;
1672 enum machine_mode imode = mode;
1673 /* We need to do reals as ints because the constant used in the
1674 * AND might not be a legitimate real constant. */
1675 imode = int_mode_for_mode (mode);
1676 constant_to_array (mode, ops[1], arr_fsmbi);
1678 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1681 for (i = 0; i < 16; i++)
1682 if (arr_fsmbi[i] != 0)
1684 arr_andbi[0] = arr_fsmbi[i];
1685 arr_fsmbi[i] = 0xff;
1687 for (i = 1; i < 16; i++)
1688 arr_andbi[i] = arr_andbi[0];
1689 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1690 reg_and = array_to_constant (imode, arr_andbi);
1691 emit_move_insn (to, reg_fsmbi);
1692 emit_insn (gen_rtx_SET
1693 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1697 if (reload_in_progress || reload_completed)
1699 rtx mem = force_const_mem (mode, ops[1]);
1700 if (TARGET_LARGE_MEM)
1702 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1703 emit_move_insn (addr, XEXP (mem, 0));
1704 mem = replace_equiv_address (mem, addr);
1706 emit_move_insn (ops[0], mem);
1712 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1716 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1717 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1720 emit_insn (gen_pic (ops[0], ops[1]));
1723 rtx pic_reg = get_pic_reg ();
1724 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1725 crtl->uses_pic_offset_table = 1;
1727 return flag_pic || c == IC_IL2s;
1738 /* SAVING is TRUE when we are generating the actual load and store
1739 instructions for REGNO. When determining the size of the stack
1740 needed for saving register we must allocate enough space for the
1741 worst case, because we don't always have the information early enough
1742 to not allocate it. But we can at least eliminate the actual loads
1743 and stores during the prologue/epilogue. */
1745 need_to_save_reg (int regno, int saving)
1747 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1750 && regno == PIC_OFFSET_TABLE_REGNUM
1751 && (!saving || crtl->uses_pic_offset_table)
1753 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1758 /* This function is only correct starting with local register
1761 spu_saved_regs_size (void)
1763 int reg_save_size = 0;
1766 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1767 if (need_to_save_reg (regno, 0))
1768 reg_save_size += 0x10;
1769 return reg_save_size;
1773 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1775 rtx reg = gen_rtx_REG (V4SImode, regno);
1777 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1778 return emit_insn (gen_movv4si (mem, reg));
1782 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1784 rtx reg = gen_rtx_REG (V4SImode, regno);
1786 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1787 return emit_insn (gen_movv4si (reg, mem));
1790 /* This happens after reload, so we need to expand it. */
1792 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1795 if (satisfies_constraint_K (GEN_INT (imm)))
1797 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1801 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1802 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1803 if (REGNO (src) == REGNO (scratch))
1809 /* Return nonzero if this function is known to have a null epilogue. */
1812 direct_return (void)
1814 if (reload_completed)
1816 if (cfun->static_chain_decl == 0
1817 && (spu_saved_regs_size ()
1819 + crtl->outgoing_args_size
1820 + crtl->args.pretend_args_size == 0)
1821 && current_function_is_leaf)
1828 The stack frame looks like this:
1832 AP -> +-------------+
1835 prev SP | back chain |
1838 | reg save | crtl->args.pretend_args_size bytes
1841 | saved regs | spu_saved_regs_size() bytes
1842 FP -> +-------------+
1844 | vars | get_frame_size() bytes
1845 HFP -> +-------------+
1848 | args | crtl->outgoing_args_size bytes
1854 SP -> +-------------+
1858 spu_expand_prologue (void)
1860 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1861 HOST_WIDE_INT total_size;
1862 HOST_WIDE_INT saved_regs_size;
1863 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1864 rtx scratch_reg_0, scratch_reg_1;
1867 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1868 the "toplevel" insn chain. */
1869 emit_note (NOTE_INSN_DELETED);
1871 if (flag_pic && optimize == 0)
1872 crtl->uses_pic_offset_table = 1;
1874 if (spu_naked_function_p (current_function_decl))
1877 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1878 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1880 saved_regs_size = spu_saved_regs_size ();
1881 total_size = size + saved_regs_size
1882 + crtl->outgoing_args_size
1883 + crtl->args.pretend_args_size;
1885 if (!current_function_is_leaf
1886 || cfun->calls_alloca || total_size > 0)
1887 total_size += STACK_POINTER_OFFSET;
1889 /* Save this first because code after this might use the link
1890 register as a scratch register. */
1891 if (!current_function_is_leaf)
1893 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1894 RTX_FRAME_RELATED_P (insn) = 1;
1899 offset = -crtl->args.pretend_args_size;
1900 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1901 if (need_to_save_reg (regno, 1))
1904 insn = frame_emit_store (regno, sp_reg, offset);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1909 if (flag_pic && crtl->uses_pic_offset_table)
1911 rtx pic_reg = get_pic_reg ();
1912 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1913 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1918 if (flag_stack_check)
1920 /* We compare against total_size-1 because
1921 ($sp >= total_size) <=> ($sp > total_size-1) */
1922 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1923 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1924 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1925 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1927 emit_move_insn (scratch_v4si, size_v4si);
1928 size_v4si = scratch_v4si;
1930 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1931 emit_insn (gen_vec_extractv4si
1932 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1933 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1936 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1937 the value of the previous $sp because we save it as the back
1939 if (total_size <= 2000)
1941 /* In this case we save the back chain first. */
1942 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1944 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1948 insn = emit_move_insn (scratch_reg_0, sp_reg);
1950 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1952 RTX_FRAME_RELATED_P (insn) = 1;
1953 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1954 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1956 if (total_size > 2000)
1958 /* Save the back chain ptr */
1959 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1962 if (frame_pointer_needed)
1964 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1965 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1966 + crtl->outgoing_args_size;
1967 /* Set the new frame_pointer */
1968 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1969 RTX_FRAME_RELATED_P (insn) = 1;
1970 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1971 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1972 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1976 emit_note (NOTE_INSN_DELETED);
1980 spu_expand_epilogue (bool sibcall_p)
1982 int size = get_frame_size (), offset, regno;
1983 HOST_WIDE_INT saved_regs_size, total_size;
1984 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1985 rtx jump, scratch_reg_0;
1987 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1988 the "toplevel" insn chain. */
1989 emit_note (NOTE_INSN_DELETED);
1991 if (spu_naked_function_p (current_function_decl))
1994 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1996 saved_regs_size = spu_saved_regs_size ();
1997 total_size = size + saved_regs_size
1998 + crtl->outgoing_args_size
1999 + crtl->args.pretend_args_size;
2001 if (!current_function_is_leaf
2002 || cfun->calls_alloca || total_size > 0)
2003 total_size += STACK_POINTER_OFFSET;
2007 if (cfun->calls_alloca)
2008 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2010 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2013 if (saved_regs_size > 0)
2015 offset = -crtl->args.pretend_args_size;
2016 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2017 if (need_to_save_reg (regno, 1))
2020 frame_emit_load (regno, sp_reg, offset);
2025 if (!current_function_is_leaf)
2026 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2030 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2031 jump = emit_jump_insn (gen__return ());
2032 emit_barrier_after (jump);
2035 emit_note (NOTE_INSN_DELETED);
2039 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2043 /* This is inefficient because it ends up copying to a save-register
2044 which then gets saved even though $lr has already been saved. But
2045 it does generate better code for leaf functions and we don't need
2046 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2047 used for __builtin_return_address anyway, so maybe we don't care if
2048 it's inefficient. */
2049 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2053 /* Given VAL, generate a constant appropriate for MODE.
2054 If MODE is a vector mode, every element will be VAL.
2055 For TImode, VAL will be zero extended to 128 bits. */
2057 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2063 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2064 || GET_MODE_CLASS (mode) == MODE_FLOAT
2065 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2066 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2068 if (GET_MODE_CLASS (mode) == MODE_INT)
2069 return immed_double_const (val, 0, mode);
2071 /* val is the bit representation of the float */
2072 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2073 return hwint_to_const_double (mode, val);
2075 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2076 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2078 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2080 units = GET_MODE_NUNITS (mode);
2082 v = rtvec_alloc (units);
2084 for (i = 0; i < units; ++i)
2085 RTVEC_ELT (v, i) = inner;
2087 return gen_rtx_CONST_VECTOR (mode, v);
2090 /* Create a MODE vector constant from 4 ints. */
2092 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2094 unsigned char arr[16];
2095 arr[0] = (a >> 24) & 0xff;
2096 arr[1] = (a >> 16) & 0xff;
2097 arr[2] = (a >> 8) & 0xff;
2098 arr[3] = (a >> 0) & 0xff;
2099 arr[4] = (b >> 24) & 0xff;
2100 arr[5] = (b >> 16) & 0xff;
2101 arr[6] = (b >> 8) & 0xff;
2102 arr[7] = (b >> 0) & 0xff;
2103 arr[8] = (c >> 24) & 0xff;
2104 arr[9] = (c >> 16) & 0xff;
2105 arr[10] = (c >> 8) & 0xff;
2106 arr[11] = (c >> 0) & 0xff;
2107 arr[12] = (d >> 24) & 0xff;
2108 arr[13] = (d >> 16) & 0xff;
2109 arr[14] = (d >> 8) & 0xff;
2110 arr[15] = (d >> 0) & 0xff;
2111 return array_to_constant(mode, arr);
2114 /* branch hint stuff */
2116 /* An array of these is used to propagate hints to predecessor blocks. */
2119 rtx prop_jump; /* propagated from another block */
2120 int bb_index; /* the original block. */
2122 static struct spu_bb_info *spu_bb_info;
2124 #define STOP_HINT_P(INSN) \
2125 (GET_CODE(INSN) == CALL_INSN \
2126 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2127 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2129 /* 1 when RTX is a hinted branch or its target. We keep track of
2130 what has been hinted so the safe-hint code can test it easily. */
2131 #define HINTED_P(RTX) \
2132 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2134 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2135 #define SCHED_ON_EVEN_P(RTX) \
2136 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2138 /* Emit a nop for INSN such that the two will dual issue. This assumes
2139 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2140 We check for TImode to handle a MULTI1 insn which has dual issued its
2141 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2144 emit_nop_for_insn (rtx insn)
2148 p = get_pipe (insn);
2149 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2150 new_insn = emit_insn_after (gen_lnop (), insn);
2151 else if (p == 1 && GET_MODE (insn) == TImode)
2153 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2154 PUT_MODE (new_insn, TImode);
2155 PUT_MODE (insn, VOIDmode);
2158 new_insn = emit_insn_after (gen_lnop (), insn);
2159 recog_memoized (new_insn);
2162 /* Insert nops in basic blocks to meet dual issue alignment
2163 requirements. Also make sure hbrp and hint instructions are at least
2164 one cycle apart, possibly inserting a nop. */
2168 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2172 /* This sets up INSN_ADDRESSES. */
2173 shorten_branches (get_insns ());
2175 /* Keep track of length added by nops. */
2179 insn = get_insns ();
2180 if (!active_insn_p (insn))
2181 insn = next_active_insn (insn);
2182 for (; insn; insn = next_insn)
2184 next_insn = next_active_insn (insn);
2185 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2186 || INSN_CODE (insn) == CODE_FOR_hbr)
2190 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2191 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2192 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2195 prev_insn = emit_insn_before (gen_lnop (), insn);
2196 PUT_MODE (prev_insn, GET_MODE (insn));
2197 PUT_MODE (insn, TImode);
2203 if (INSN_CODE (insn) == CODE_FOR_blockage)
2205 if (GET_MODE (insn) == TImode)
2206 PUT_MODE (next_insn, TImode);
2208 next_insn = next_active_insn (insn);
2210 addr = INSN_ADDRESSES (INSN_UID (insn));
2211 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2213 if (((addr + length) & 7) != 0)
2215 emit_nop_for_insn (prev_insn);
2219 else if (GET_MODE (insn) == TImode
2220 && ((next_insn && GET_MODE (next_insn) != TImode)
2221 || get_attr_type (insn) == TYPE_MULTI0)
2222 && ((addr + length) & 7) != 0)
2224 /* prev_insn will always be set because the first insn is
2225 always 8-byte aligned. */
2226 emit_nop_for_insn (prev_insn);
2234 /* Routines for branch hints. */
2237 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2238 int distance, sbitmap blocks)
2240 rtx branch_label = 0;
2245 if (before == 0 || branch == 0 || target == 0)
2248 /* While scheduling we require hints to be no further than 600, so
2249 we need to enforce that here too */
2253 /* If we have a Basic block note, emit it after the basic block note. */
2254 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2255 before = NEXT_INSN (before);
2257 branch_label = gen_label_rtx ();
2258 LABEL_NUSES (branch_label)++;
2259 LABEL_PRESERVE_P (branch_label) = 1;
2260 insn = emit_label_before (branch_label, branch);
2261 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2262 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2264 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2265 recog_memoized (hint);
2266 HINTED_P (branch) = 1;
2268 if (GET_CODE (target) == LABEL_REF)
2269 HINTED_P (XEXP (target, 0)) = 1;
2270 else if (tablejump_p (branch, 0, &table))
2274 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2275 vec = XVEC (PATTERN (table), 0);
2277 vec = XVEC (PATTERN (table), 1);
2278 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2279 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2282 if (distance >= 588)
2284 /* Make sure the hint isn't scheduled any earlier than this point,
2285 which could make it too far for the branch offest to fit */
2286 recog_memoized (emit_insn_before (gen_blockage (), hint));
2288 else if (distance <= 8 * 4)
2290 /* To guarantee at least 8 insns between the hint and branch we
2293 for (d = distance; d < 8 * 4; d += 4)
2296 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2297 recog_memoized (insn);
2300 /* Make sure any nops inserted aren't scheduled before the hint. */
2301 recog_memoized (emit_insn_after (gen_blockage (), hint));
2303 /* Make sure any nops inserted aren't scheduled after the call. */
2304 if (CALL_P (branch) && distance < 8 * 4)
2305 recog_memoized (emit_insn_before (gen_blockage (), branch));
2309 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2310 the rtx for the branch target. */
2312 get_branch_target (rtx branch)
2314 if (GET_CODE (branch) == JUMP_INSN)
2318 /* Return statements */
2319 if (GET_CODE (PATTERN (branch)) == RETURN)
2320 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2323 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2324 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2327 set = single_set (branch);
2328 src = SET_SRC (set);
2329 if (GET_CODE (SET_DEST (set)) != PC)
2332 if (GET_CODE (src) == IF_THEN_ELSE)
2335 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2338 /* If the more probable case is not a fall through, then
2339 try a branch hint. */
2340 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2341 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2342 && GET_CODE (XEXP (src, 1)) != PC)
2343 lab = XEXP (src, 1);
2344 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2345 && GET_CODE (XEXP (src, 2)) != PC)
2346 lab = XEXP (src, 2);
2350 if (GET_CODE (lab) == RETURN)
2351 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2359 else if (GET_CODE (branch) == CALL_INSN)
2362 /* All of our call patterns are in a PARALLEL and the CALL is
2363 the first pattern in the PARALLEL. */
2364 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2366 call = XVECEXP (PATTERN (branch), 0, 0);
2367 if (GET_CODE (call) == SET)
2368 call = SET_SRC (call);
2369 if (GET_CODE (call) != CALL)
2371 return XEXP (XEXP (call, 0), 0);
2376 /* The special $hbr register is used to prevent the insn scheduler from
2377 moving hbr insns across instructions which invalidate them. It
2378 should only be used in a clobber, and this function searches for
2379 insns which clobber it. */
2381 insn_clobbers_hbr (rtx insn)
2384 && GET_CODE (PATTERN (insn)) == PARALLEL)
2386 rtx parallel = PATTERN (insn);
2389 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2391 clobber = XVECEXP (parallel, 0, j);
2392 if (GET_CODE (clobber) == CLOBBER
2393 && GET_CODE (XEXP (clobber, 0)) == REG
2394 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2401 /* Search up to 32 insns starting at FIRST:
2402 - at any kind of hinted branch, just return
2403 - at any unconditional branch in the first 15 insns, just return
2404 - at a call or indirect branch, after the first 15 insns, force it to
2405 an even address and return
2406 - at any unconditional branch, after the first 15 insns, force it to
2408 At then end of the search, insert an hbrp within 4 insns of FIRST,
2409 and an hbrp within 16 instructions of FIRST.
2412 insert_hbrp_for_ilb_runout (rtx first)
2414 rtx insn, before_4 = 0, before_16 = 0;
2415 int addr = 0, length, first_addr = -1;
2416 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2417 int insert_lnop_after = 0;
2418 for (insn = first; insn; insn = NEXT_INSN (insn))
2421 if (first_addr == -1)
2422 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2423 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2424 length = get_attr_length (insn);
2426 if (before_4 == 0 && addr + length >= 4 * 4)
2428 /* We test for 14 instructions because the first hbrp will add
2429 up to 2 instructions. */
2430 if (before_16 == 0 && addr + length >= 14 * 4)
2433 if (INSN_CODE (insn) == CODE_FOR_hbr)
2435 /* Make sure an hbrp is at least 2 cycles away from a hint.
2436 Insert an lnop after the hbrp when necessary. */
2437 if (before_4 == 0 && addr > 0)
2440 insert_lnop_after |= 1;
2442 else if (before_4 && addr <= 4 * 4)
2443 insert_lnop_after |= 1;
2444 if (before_16 == 0 && addr > 10 * 4)
2447 insert_lnop_after |= 2;
2449 else if (before_16 && addr <= 14 * 4)
2450 insert_lnop_after |= 2;
2453 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2455 if (addr < hbrp_addr0)
2457 else if (addr < hbrp_addr1)
2461 if (CALL_P (insn) || JUMP_P (insn))
2463 if (HINTED_P (insn))
2466 /* Any branch after the first 15 insns should be on an even
2467 address to avoid a special case branch. There might be
2468 some nops and/or hbrps inserted, so we test after 10
2471 SCHED_ON_EVEN_P (insn) = 1;
2474 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2478 if (addr + length >= 32 * 4)
2480 gcc_assert (before_4 && before_16);
2481 if (hbrp_addr0 > 4 * 4)
2484 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2485 recog_memoized (insn);
2486 INSN_ADDRESSES_NEW (insn,
2487 INSN_ADDRESSES (INSN_UID (before_4)));
2488 PUT_MODE (insn, GET_MODE (before_4));
2489 PUT_MODE (before_4, TImode);
2490 if (insert_lnop_after & 1)
2492 insn = emit_insn_before (gen_lnop (), before_4);
2493 recog_memoized (insn);
2494 INSN_ADDRESSES_NEW (insn,
2495 INSN_ADDRESSES (INSN_UID (before_4)));
2496 PUT_MODE (insn, TImode);
2499 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2500 && hbrp_addr1 > 16 * 4)
2503 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2504 recog_memoized (insn);
2505 INSN_ADDRESSES_NEW (insn,
2506 INSN_ADDRESSES (INSN_UID (before_16)));
2507 PUT_MODE (insn, GET_MODE (before_16));
2508 PUT_MODE (before_16, TImode);
2509 if (insert_lnop_after & 2)
2511 insn = emit_insn_before (gen_lnop (), before_16);
2512 recog_memoized (insn);
2513 INSN_ADDRESSES_NEW (insn,
2514 INSN_ADDRESSES (INSN_UID
2516 PUT_MODE (insn, TImode);
2522 else if (BARRIER_P (insn))
2527 /* The SPU might hang when it executes 48 inline instructions after a
2528 hinted branch jumps to its hinted target. The beginning of a
2529 function and the return from a call might have been hinted, and must
2530 be handled as well. To prevent a hang we insert 2 hbrps. The first
2531 should be within 6 insns of the branch target. The second should be
2532 within 22 insns of the branch target. When determining if hbrps are
2533 necessary, we look for only 32 inline instructions, because up to to
2534 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2535 new hbrps, we insert them within 4 and 16 insns of the target. */
2540 if (TARGET_SAFE_HINTS)
2542 shorten_branches (get_insns ());
2543 /* Insert hbrp at beginning of function */
2544 insn = next_active_insn (get_insns ());
2546 insert_hbrp_for_ilb_runout (insn);
2547 /* Insert hbrp after hinted targets. */
2548 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2549 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2550 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2554 static int in_spu_reorg;
2556 /* Insert branch hints. There are no branch optimizations after this
2557 pass, so it's safe to set our branch hints now. */
2559 spu_machine_dependent_reorg (void)
2564 rtx branch_target = 0;
2565 int branch_addr = 0, insn_addr, required_dist = 0;
2569 if (!TARGET_BRANCH_HINTS || optimize == 0)
2571 /* We still do it for unoptimized code because an external
2572 function might have hinted a call or return. */
2578 blocks = sbitmap_alloc (last_basic_block);
2579 sbitmap_zero (blocks);
2582 compute_bb_for_insn ();
2587 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2588 sizeof (struct spu_bb_info));
2590 /* We need exact insn addresses and lengths. */
2591 shorten_branches (get_insns ());
2593 for (i = n_basic_blocks - 1; i >= 0; i--)
2595 bb = BASIC_BLOCK (i);
2597 if (spu_bb_info[i].prop_jump)
2599 branch = spu_bb_info[i].prop_jump;
2600 branch_target = get_branch_target (branch);
2601 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2602 required_dist = spu_hint_dist;
2604 /* Search from end of a block to beginning. In this loop, find
2605 jumps which need a branch and emit them only when:
2606 - it's an indirect branch and we're at the insn which sets
2608 - we're at an insn that will invalidate the hint. e.g., a
2609 call, another hint insn, inline asm that clobbers $hbr, and
2610 some inlined operations (divmodsi4). Don't consider jumps
2611 because they are only at the end of a block and are
2612 considered when we are deciding whether to propagate
2613 - we're getting too far away from the branch. The hbr insns
2614 only have a signed 10 bit offset
2615 We go back as far as possible so the branch will be considered
2616 for propagation when we get to the beginning of the block. */
2617 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2621 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2623 && ((GET_CODE (branch_target) == REG
2624 && set_of (branch_target, insn) != NULL_RTX)
2625 || insn_clobbers_hbr (insn)
2626 || branch_addr - insn_addr > 600))
2628 rtx next = NEXT_INSN (insn);
2629 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2630 if (insn != BB_END (bb)
2631 && branch_addr - next_addr >= required_dist)
2635 "hint for %i in block %i before %i\n",
2636 INSN_UID (branch), bb->index,
2638 spu_emit_branch_hint (next, branch, branch_target,
2639 branch_addr - next_addr, blocks);
2644 /* JUMP_P will only be true at the end of a block. When
2645 branch is already set it means we've previously decided
2646 to propagate a hint for that branch into this block. */
2647 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2650 if ((branch_target = get_branch_target (insn)))
2653 branch_addr = insn_addr;
2654 required_dist = spu_hint_dist;
2658 if (insn == BB_HEAD (bb))
2664 /* If we haven't emitted a hint for this branch yet, it might
2665 be profitable to emit it in one of the predecessor blocks,
2666 especially for loops. */
2668 basic_block prev = 0, prop = 0, prev2 = 0;
2669 int loop_exit = 0, simple_loop = 0;
2670 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2672 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2673 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2674 prev = EDGE_PRED (bb, j)->src;
2676 prev2 = EDGE_PRED (bb, j)->src;
2678 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2679 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2681 else if (EDGE_SUCC (bb, j)->dest == bb)
2684 /* If this branch is a loop exit then propagate to previous
2685 fallthru block. This catches the cases when it is a simple
2686 loop or when there is an initial branch into the loop. */
2687 if (prev && (loop_exit || simple_loop)
2688 && prev->loop_depth <= bb->loop_depth)
2691 /* If there is only one adjacent predecessor. Don't propagate
2692 outside this loop. This loop_depth test isn't perfect, but
2693 I'm not sure the loop_father member is valid at this point. */
2694 else if (prev && single_pred_p (bb)
2695 && prev->loop_depth == bb->loop_depth)
2698 /* If this is the JOIN block of a simple IF-THEN then
2699 propogate the hint to the HEADER block. */
2700 else if (prev && prev2
2701 && EDGE_COUNT (bb->preds) == 2
2702 && EDGE_COUNT (prev->preds) == 1
2703 && EDGE_PRED (prev, 0)->src == prev2
2704 && prev2->loop_depth == bb->loop_depth
2705 && GET_CODE (branch_target) != REG)
2708 /* Don't propagate when:
2709 - this is a simple loop and the hint would be too far
2710 - this is not a simple loop and there are 16 insns in
2712 - the predecessor block ends in a branch that will be
2714 - the predecessor block ends in an insn that invalidates
2718 && (bbend = BB_END (prop))
2719 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2720 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2721 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2724 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2725 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2726 bb->index, prop->index, bb->loop_depth,
2727 INSN_UID (branch), loop_exit, simple_loop,
2728 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2730 spu_bb_info[prop->index].prop_jump = branch;
2731 spu_bb_info[prop->index].bb_index = i;
2733 else if (branch_addr - next_addr >= required_dist)
2736 fprintf (dump_file, "hint for %i in block %i before %i\n",
2737 INSN_UID (branch), bb->index,
2738 INSN_UID (NEXT_INSN (insn)));
2739 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2740 branch_addr - next_addr, blocks);
2747 if (!sbitmap_empty_p (blocks))
2748 find_many_sub_basic_blocks (blocks);
2750 /* We have to schedule to make sure alignment is ok. */
2751 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2753 /* The hints need to be scheduled, so call it again. */
2761 if (spu_flag_var_tracking)
2764 timevar_push (TV_VAR_TRACKING);
2765 variable_tracking_main ();
2766 timevar_pop (TV_VAR_TRACKING);
2767 df_finish_pass (false);
2770 free_bb_for_insn ();
2776 /* Insn scheduling routines, primarily for dual issue. */
2778 spu_sched_issue_rate (void)
2784 uses_ls_unit(rtx insn)
2786 rtx set = single_set (insn);
2788 && (GET_CODE (SET_DEST (set)) == MEM
2789 || GET_CODE (SET_SRC (set)) == MEM))
2798 /* Handle inline asm */
2799 if (INSN_CODE (insn) == -1)
2801 t = get_attr_type (insn);
2826 case TYPE_IPREFETCH:
2834 /* haifa-sched.c has a static variable that keeps track of the current
2835 cycle. It is passed to spu_sched_reorder, and we record it here for
2836 use by spu_sched_variable_issue. It won't be accurate if the
2837 scheduler updates it's clock_var between the two calls. */
2838 static int clock_var;
2840 /* This is used to keep track of insn alignment. Set to 0 at the
2841 beginning of each block and increased by the "length" attr of each
2843 static int spu_sched_length;
2845 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2846 ready list appropriately in spu_sched_reorder(). */
2847 static int pipe0_clock;
2848 static int pipe1_clock;
2850 static int prev_clock_var;
2852 static int prev_priority;
2854 /* The SPU needs to load the next ilb sometime during the execution of
2855 the previous ilb. There is a potential conflict if every cycle has a
2856 load or store. To avoid the conflict we make sure the load/store
2857 unit is free for at least one cycle during the execution of insns in
2858 the previous ilb. */
2859 static int spu_ls_first;
2860 static int prev_ls_clock;
2863 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2864 int max_ready ATTRIBUTE_UNUSED)
2866 spu_sched_length = 0;
2870 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2871 int max_ready ATTRIBUTE_UNUSED)
2873 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2875 /* When any block might be at least 8-byte aligned, assume they
2876 will all be at least 8-byte aligned to make sure dual issue
2877 works out correctly. */
2878 spu_sched_length = 0;
2880 spu_ls_first = INT_MAX;
2885 prev_clock_var = -1;
2890 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2891 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2895 if (GET_CODE (PATTERN (insn)) == USE
2896 || GET_CODE (PATTERN (insn)) == CLOBBER
2897 || (len = get_attr_length (insn)) == 0)
2900 spu_sched_length += len;
2902 /* Reset on inline asm */
2903 if (INSN_CODE (insn) == -1)
2905 spu_ls_first = INT_MAX;
2910 p = get_pipe (insn);
2912 pipe0_clock = clock_var;
2914 pipe1_clock = clock_var;
2918 if (clock_var - prev_ls_clock > 1
2919 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2920 spu_ls_first = INT_MAX;
2921 if (uses_ls_unit (insn))
2923 if (spu_ls_first == INT_MAX)
2924 spu_ls_first = spu_sched_length;
2925 prev_ls_clock = clock_var;
2928 /* The scheduler hasn't inserted the nop, but we will later on.
2929 Include those nops in spu_sched_length. */
2930 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2931 spu_sched_length += 4;
2932 prev_clock_var = clock_var;
2934 /* more is -1 when called from spu_sched_reorder for new insns
2935 that don't have INSN_PRIORITY */
2937 prev_priority = INSN_PRIORITY (insn);
2940 /* Always try issueing more insns. spu_sched_reorder will decide
2941 when the cycle should be advanced. */
2945 /* This function is called for both TARGET_SCHED_REORDER and
2946 TARGET_SCHED_REORDER2. */
2948 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2949 rtx *ready, int *nreadyp, int clock)
2951 int i, nready = *nreadyp;
2952 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2957 if (nready <= 0 || pipe1_clock >= clock)
2960 /* Find any rtl insns that don't generate assembly insns and schedule
2962 for (i = nready - 1; i >= 0; i--)
2965 if (INSN_CODE (insn) == -1
2966 || INSN_CODE (insn) == CODE_FOR_blockage
2967 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2969 ready[i] = ready[nready - 1];
2970 ready[nready - 1] = insn;
2975 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2976 for (i = 0; i < nready; i++)
2977 if (INSN_CODE (ready[i]) != -1)
2980 switch (get_attr_type (insn))
3005 case TYPE_IPREFETCH:
3011 /* In the first scheduling phase, schedule loads and stores together
3012 to increase the chance they will get merged during postreload CSE. */
3013 if (!reload_completed && pipe_ls >= 0)
3015 insn = ready[pipe_ls];
3016 ready[pipe_ls] = ready[nready - 1];
3017 ready[nready - 1] = insn;
3021 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3025 /* When we have loads/stores in every cycle of the last 15 insns and
3026 we are about to schedule another load/store, emit an hbrp insn
3029 && spu_sched_length - spu_ls_first >= 4 * 15
3030 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3032 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3033 recog_memoized (insn);
3034 if (pipe0_clock < clock)
3035 PUT_MODE (insn, TImode);
3036 spu_sched_variable_issue (file, verbose, insn, -1);
3040 /* In general, we want to emit nops to increase dual issue, but dual
3041 issue isn't faster when one of the insns could be scheduled later
3042 without effecting the critical path. We look at INSN_PRIORITY to
3043 make a good guess, but it isn't perfect so -mdual-nops=n can be
3044 used to effect it. */
3045 if (in_spu_reorg && spu_dual_nops < 10)
3047 /* When we are at an even address and we are not issueing nops to
3048 improve scheduling then we need to advance the cycle. */
3049 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3050 && (spu_dual_nops == 0
3053 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3056 /* When at an odd address, schedule the highest priority insn
3057 without considering pipeline. */
3058 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3059 && (spu_dual_nops == 0
3061 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3066 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3067 pipe0 insn in the ready list, schedule it. */
3068 if (pipe0_clock < clock && pipe_0 >= 0)
3069 schedule_i = pipe_0;
3071 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3072 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3074 schedule_i = pipe_1;
3076 if (schedule_i > -1)
3078 insn = ready[schedule_i];
3079 ready[schedule_i] = ready[nready - 1];
3080 ready[nready - 1] = insn;
3086 /* INSN is dependent on DEP_INSN. */
3088 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3092 /* The blockage pattern is used to prevent instructions from being
3093 moved across it and has no cost. */
3094 if (INSN_CODE (insn) == CODE_FOR_blockage
3095 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3098 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3099 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3102 /* Make sure hbrps are spread out. */
3103 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3104 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3107 /* Make sure hints and hbrps are 2 cycles apart. */
3108 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3109 || INSN_CODE (insn) == CODE_FOR_hbr)
3110 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3111 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3114 /* An hbrp has no real dependency on other insns. */
3115 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3116 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3119 /* Assuming that it is unlikely an argument register will be used in
3120 the first cycle of the called function, we reduce the cost for
3121 slightly better scheduling of dep_insn. When not hinted, the
3122 mispredicted branch would hide the cost as well. */
3125 rtx target = get_branch_target (insn);
3126 if (GET_CODE (target) != REG || !set_of (target, insn))
3131 /* And when returning from a function, let's assume the return values
3132 are completed sooner too. */
3133 if (CALL_P (dep_insn))
3136 /* Make sure an instruction that loads from the back chain is schedule
3137 away from the return instruction so a hint is more likely to get
3139 if (INSN_CODE (insn) == CODE_FOR__return
3140 && (set = single_set (dep_insn))
3141 && GET_CODE (SET_DEST (set)) == REG
3142 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3145 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3146 scheduler makes every insn in a block anti-dependent on the final
3147 jump_insn. We adjust here so higher cost insns will get scheduled
3149 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3150 return insn_cost (dep_insn) - 3;
3155 /* Create a CONST_DOUBLE from a string. */
3157 spu_float_const (const char *string, enum machine_mode mode)
3159 REAL_VALUE_TYPE value;
3160 value = REAL_VALUE_ATOF (string, mode);
3161 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3165 spu_constant_address_p (rtx x)
3167 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3168 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3169 || GET_CODE (x) == HIGH);
3172 static enum spu_immediate
3173 which_immediate_load (HOST_WIDE_INT val)
3175 gcc_assert (val == trunc_int_for_mode (val, SImode));
3177 if (val >= -0x8000 && val <= 0x7fff)
3179 if (val >= 0 && val <= 0x3ffff)
3181 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3183 if ((val & 0xffff) == 0)
3189 /* Return true when OP can be loaded by one of the il instructions, or
3190 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3192 immediate_load_p (rtx op, enum machine_mode mode)
3194 if (CONSTANT_P (op))
3196 enum immediate_class c = classify_immediate (op, mode);
3197 return c == IC_IL1 || c == IC_IL1s
3198 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3203 /* Return true if the first SIZE bytes of arr is a constant that can be
3204 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3205 represent the size and offset of the instruction to use. */
3207 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3209 int cpat, run, i, start;
3213 for (i = 0; i < size && cpat; i++)
3221 else if (arr[i] == 2 && arr[i+1] == 3)
3223 else if (arr[i] == 0)
3225 while (arr[i+run] == run && i+run < 16)
3227 if (run != 4 && run != 8)
3232 if ((i & (run-1)) != 0)
3239 if (cpat && (run || size < 16))
3246 *pstart = start == -1 ? 16-run : start;
3252 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3253 it into a register. MODE is only valid when OP is a CONST_INT. */
3254 static enum immediate_class
3255 classify_immediate (rtx op, enum machine_mode mode)
3258 unsigned char arr[16];
3259 int i, j, repeated, fsmbi, repeat;
3261 gcc_assert (CONSTANT_P (op));
3263 if (GET_MODE (op) != VOIDmode)
3264 mode = GET_MODE (op);
3266 /* A V4SI const_vector with all identical symbols is ok. */
3269 && GET_CODE (op) == CONST_VECTOR
3270 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3271 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3272 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3273 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3274 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3275 op = CONST_VECTOR_ELT (op, 0);
3277 switch (GET_CODE (op))
3281 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3284 /* We can never know if the resulting address fits in 18 bits and can be
3285 loaded with ila. For now, assume the address will not overflow if
3286 the displacement is "small" (fits 'K' constraint). */
3287 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3289 rtx sym = XEXP (XEXP (op, 0), 0);
3290 rtx cst = XEXP (XEXP (op, 0), 1);
3292 if (GET_CODE (sym) == SYMBOL_REF
3293 && GET_CODE (cst) == CONST_INT
3294 && satisfies_constraint_K (cst))
3303 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3304 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3305 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3311 constant_to_array (mode, op, arr);
3313 /* Check that each 4-byte slot is identical. */
3315 for (i = 4; i < 16; i += 4)
3316 for (j = 0; j < 4; j++)
3317 if (arr[j] != arr[i + j])
3322 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3323 val = trunc_int_for_mode (val, SImode);
3325 if (which_immediate_load (val) != SPU_NONE)
3329 /* Any mode of 2 bytes or smaller can be loaded with an il
3331 gcc_assert (GET_MODE_SIZE (mode) > 2);
3335 for (i = 0; i < 16 && fsmbi; i++)
3336 if (arr[i] != 0 && repeat == 0)
3338 else if (arr[i] != 0 && arr[i] != repeat)
3341 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3343 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3356 static enum spu_immediate
3357 which_logical_immediate (HOST_WIDE_INT val)
3359 gcc_assert (val == trunc_int_for_mode (val, SImode));
3361 if (val >= -0x200 && val <= 0x1ff)
3363 if (val >= 0 && val <= 0xffff)
3365 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3367 val = trunc_int_for_mode (val, HImode);
3368 if (val >= -0x200 && val <= 0x1ff)
3370 if ((val & 0xff) == ((val >> 8) & 0xff))
3372 val = trunc_int_for_mode (val, QImode);
3373 if (val >= -0x200 && val <= 0x1ff)
3380 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3383 const_vector_immediate_p (rtx x)
3386 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3387 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3388 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3389 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3395 logical_immediate_p (rtx op, enum machine_mode mode)
3398 unsigned char arr[16];
3401 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3402 || GET_CODE (op) == CONST_VECTOR);
3404 if (GET_CODE (op) == CONST_VECTOR
3405 && !const_vector_immediate_p (op))
3408 if (GET_MODE (op) != VOIDmode)
3409 mode = GET_MODE (op);
3411 constant_to_array (mode, op, arr);
3413 /* Check that bytes are repeated. */
3414 for (i = 4; i < 16; i += 4)
3415 for (j = 0; j < 4; j++)
3416 if (arr[j] != arr[i + j])
3419 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3420 val = trunc_int_for_mode (val, SImode);
3422 i = which_logical_immediate (val);
3423 return i != SPU_NONE && i != SPU_IOHL;
3427 iohl_immediate_p (rtx op, enum machine_mode mode)
3430 unsigned char arr[16];
3433 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3434 || GET_CODE (op) == CONST_VECTOR);
3436 if (GET_CODE (op) == CONST_VECTOR
3437 && !const_vector_immediate_p (op))
3440 if (GET_MODE (op) != VOIDmode)
3441 mode = GET_MODE (op);
3443 constant_to_array (mode, op, arr);
3445 /* Check that bytes are repeated. */
3446 for (i = 4; i < 16; i += 4)
3447 for (j = 0; j < 4; j++)
3448 if (arr[j] != arr[i + j])
3451 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3452 val = trunc_int_for_mode (val, SImode);
3454 return val >= 0 && val <= 0xffff;
3458 arith_immediate_p (rtx op, enum machine_mode mode,
3459 HOST_WIDE_INT low, HOST_WIDE_INT high)
3462 unsigned char arr[16];
3465 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3466 || GET_CODE (op) == CONST_VECTOR);
3468 if (GET_CODE (op) == CONST_VECTOR
3469 && !const_vector_immediate_p (op))
3472 if (GET_MODE (op) != VOIDmode)
3473 mode = GET_MODE (op);
3475 constant_to_array (mode, op, arr);
3477 if (VECTOR_MODE_P (mode))
3478 mode = GET_MODE_INNER (mode);
3480 bytes = GET_MODE_SIZE (mode);
3481 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3483 /* Check that bytes are repeated. */
3484 for (i = bytes; i < 16; i += bytes)
3485 for (j = 0; j < bytes; j++)
3486 if (arr[j] != arr[i + j])
3490 for (j = 1; j < bytes; j++)
3491 val = (val << 8) | arr[j];
3493 val = trunc_int_for_mode (val, mode);
3495 return val >= low && val <= high;
3499 - any 32-bit constant (SImode, SFmode)
3500 - any constant that can be generated with fsmbi (any mode)
3501 - a 64-bit constant where the high and low bits are identical
3503 - a 128-bit constant where the four 32-bit words match. */
3505 spu_legitimate_constant_p (rtx x)
3507 if (GET_CODE (x) == HIGH)
3509 /* V4SI with all identical symbols is valid. */
3511 && GET_MODE (x) == V4SImode
3512 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3513 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3514 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3515 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3516 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3517 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3519 if (GET_CODE (x) == CONST_VECTOR
3520 && !const_vector_immediate_p (x))
3525 /* Valid address are:
3526 - symbol_ref, label_ref, const
3528 - reg + const, where either reg or const is 16 byte aligned
3529 - reg + reg, alignment doesn't matter
3530 The alignment matters in the reg+const case because lqd and stqd
3531 ignore the 4 least significant bits of the const. (TODO: It might be
3532 preferable to allow any alignment and fix it up when splitting.) */
3534 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3535 rtx x, int reg_ok_strict)
3537 if (mode == TImode && GET_CODE (x) == AND
3538 && GET_CODE (XEXP (x, 1)) == CONST_INT
3539 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3541 switch (GET_CODE (x))
3545 return !TARGET_LARGE_MEM;
3548 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3550 rtx sym = XEXP (XEXP (x, 0), 0);
3551 rtx cst = XEXP (XEXP (x, 0), 1);
3553 /* Accept any symbol_ref + constant, assuming it does not
3554 wrap around the local store addressability limit. */
3555 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3561 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3565 gcc_assert (GET_CODE (x) == REG);
3568 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3573 rtx op0 = XEXP (x, 0);
3574 rtx op1 = XEXP (x, 1);
3575 if (GET_CODE (op0) == SUBREG)
3576 op0 = XEXP (op0, 0);
3577 if (GET_CODE (op1) == SUBREG)
3578 op1 = XEXP (op1, 0);
3579 /* We can't just accept any aligned register because CSE can
3580 change it to a register that is not marked aligned and then
3581 recog will fail. So we only accept frame registers because
3582 they will only be changed to other frame registers. */
3583 if (GET_CODE (op0) == REG
3584 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3585 && GET_CODE (op1) == CONST_INT
3586 && INTVAL (op1) >= -0x2000
3587 && INTVAL (op1) <= 0x1fff
3588 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3590 if (GET_CODE (op0) == REG
3591 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3592 && GET_CODE (op1) == REG
3593 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3604 /* When the address is reg + const_int, force the const_int into a
3607 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3608 enum machine_mode mode)
3611 /* Make sure both operands are registers. */
3612 if (GET_CODE (x) == PLUS)
3616 if (ALIGNED_SYMBOL_REF_P (op0))
3618 op0 = force_reg (Pmode, op0);
3619 mark_reg_pointer (op0, 128);
3621 else if (GET_CODE (op0) != REG)
3622 op0 = force_reg (Pmode, op0);
3623 if (ALIGNED_SYMBOL_REF_P (op1))
3625 op1 = force_reg (Pmode, op1);
3626 mark_reg_pointer (op1, 128);
3628 else if (GET_CODE (op1) != REG)
3629 op1 = force_reg (Pmode, op1);
3630 x = gen_rtx_PLUS (Pmode, op0, op1);
3631 if (spu_legitimate_address (mode, x, 0))
3637 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3638 struct attribute_spec.handler. */
3640 spu_handle_fndecl_attribute (tree * node,
3642 tree args ATTRIBUTE_UNUSED,
3643 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3645 if (TREE_CODE (*node) != FUNCTION_DECL)
3647 warning (0, "`%s' attribute only applies to functions",
3648 IDENTIFIER_POINTER (name));
3649 *no_add_attrs = true;
3655 /* Handle the "vector" attribute. */
3657 spu_handle_vector_attribute (tree * node, tree name,
3658 tree args ATTRIBUTE_UNUSED,
3659 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3661 tree type = *node, result = NULL_TREE;
3662 enum machine_mode mode;
3665 while (POINTER_TYPE_P (type)
3666 || TREE_CODE (type) == FUNCTION_TYPE
3667 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3668 type = TREE_TYPE (type);
3670 mode = TYPE_MODE (type);
3672 unsigned_p = TYPE_UNSIGNED (type);
3676 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3679 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3682 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3685 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3688 result = V4SF_type_node;
3691 result = V2DF_type_node;
3697 /* Propagate qualifiers attached to the element type
3698 onto the vector type. */
3699 if (result && result != type && TYPE_QUALS (type))
3700 result = build_qualified_type (result, TYPE_QUALS (type));
3702 *no_add_attrs = true; /* No need to hang on to the attribute. */
3705 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3707 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3712 /* Return nonzero if FUNC is a naked function. */
3714 spu_naked_function_p (tree func)
3718 if (TREE_CODE (func) != FUNCTION_DECL)
3721 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3722 return a != NULL_TREE;
3726 spu_initial_elimination_offset (int from, int to)
3728 int saved_regs_size = spu_saved_regs_size ();
3730 if (!current_function_is_leaf || crtl->outgoing_args_size
3731 || get_frame_size () || saved_regs_size)
3732 sp_offset = STACK_POINTER_OFFSET;
3733 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3734 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3735 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3736 return get_frame_size ();
3737 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3738 return sp_offset + crtl->outgoing_args_size
3739 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3740 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3741 return get_frame_size () + saved_regs_size + sp_offset;
3747 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3749 enum machine_mode mode = TYPE_MODE (type);
3750 int byte_size = ((mode == BLKmode)
3751 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3753 /* Make sure small structs are left justified in a register. */
3754 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3755 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3757 enum machine_mode smode;
3760 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3761 int n = byte_size / UNITS_PER_WORD;
3762 v = rtvec_alloc (nregs);
3763 for (i = 0; i < n; i++)
3765 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3766 gen_rtx_REG (TImode,
3769 GEN_INT (UNITS_PER_WORD * i));
3770 byte_size -= UNITS_PER_WORD;
3778 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3780 gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3782 GEN_INT (UNITS_PER_WORD * n));
3784 return gen_rtx_PARALLEL (mode, v);
3786 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3790 spu_function_arg (CUMULATIVE_ARGS cum,
3791 enum machine_mode mode,
3792 tree type, int named ATTRIBUTE_UNUSED)
3796 if (cum >= MAX_REGISTER_ARGS)
3799 byte_size = ((mode == BLKmode)
3800 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3802 /* The ABI does not allow parameters to be passed partially in
3803 reg and partially in stack. */
3804 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3807 /* Make sure small structs are left justified in a register. */
3808 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3809 && byte_size < UNITS_PER_WORD && byte_size > 0)
3811 enum machine_mode smode;
3815 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3816 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3817 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3819 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3822 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3825 /* Variable sized types are passed by reference. */
3827 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3828 enum machine_mode mode ATTRIBUTE_UNUSED,
3829 const_tree type, bool named ATTRIBUTE_UNUSED)
3831 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3837 /* Create and return the va_list datatype.
3839 On SPU, va_list is an array type equivalent to
3841 typedef struct __va_list_tag
3843 void *__args __attribute__((__aligned(16)));
3844 void *__skip __attribute__((__aligned(16)));
3848 where __args points to the arg that will be returned by the next
3849 va_arg(), and __skip points to the previous stack frame such that
3850 when __args == __skip we should advance __args by 32 bytes. */
3852 spu_build_builtin_va_list (void)
3854 tree f_args, f_skip, record, type_decl;
3857 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3860 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3862 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3863 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3865 DECL_FIELD_CONTEXT (f_args) = record;
3866 DECL_ALIGN (f_args) = 128;
3867 DECL_USER_ALIGN (f_args) = 1;
3869 DECL_FIELD_CONTEXT (f_skip) = record;
3870 DECL_ALIGN (f_skip) = 128;
3871 DECL_USER_ALIGN (f_skip) = 1;
3873 TREE_CHAIN (record) = type_decl;
3874 TYPE_NAME (record) = type_decl;
3875 TYPE_FIELDS (record) = f_args;
3876 TREE_CHAIN (f_args) = f_skip;
3878 /* We know this is being padded and we want it too. It is an internal
3879 type so hide the warnings from the user. */
3881 warn_padded = false;
3883 layout_type (record);
3887 /* The correct type is an array type of one element. */
3888 return build_array_type (record, build_index_type (size_zero_node));
3891 /* Implement va_start by filling the va_list structure VALIST.
3892 NEXTARG points to the first anonymous stack argument.
3894 The following global variables are used to initialize
3895 the va_list structure:
3898 the CUMULATIVE_ARGS for this function
3900 crtl->args.arg_offset_rtx:
3901 holds the offset of the first anonymous stack argument
3902 (relative to the virtual arg pointer). */
3905 spu_va_start (tree valist, rtx nextarg)
3907 tree f_args, f_skip;
3910 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3911 f_skip = TREE_CHAIN (f_args);
3913 valist = build_va_arg_indirect_ref (valist);
3915 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3917 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3919 /* Find the __args area. */
3920 t = make_tree (TREE_TYPE (args), nextarg);
3921 if (crtl->args.pretend_args_size > 0)
3922 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3923 size_int (-STACK_POINTER_OFFSET));
3924 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3925 TREE_SIDE_EFFECTS (t) = 1;
3926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3928 /* Find the __skip area. */
3929 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3930 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3931 size_int (crtl->args.pretend_args_size
3932 - STACK_POINTER_OFFSET));
3933 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3934 TREE_SIDE_EFFECTS (t) = 1;
3935 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3938 /* Gimplify va_arg by updating the va_list structure
3939 VALIST as required to retrieve an argument of type
3940 TYPE, and returning that argument.
3942 ret = va_arg(VALIST, TYPE);
3944 generates code equivalent to:
3946 paddedsize = (sizeof(TYPE) + 15) & -16;
3947 if (VALIST.__args + paddedsize > VALIST.__skip
3948 && VALIST.__args <= VALIST.__skip)
3949 addr = VALIST.__skip + 32;
3951 addr = VALIST.__args;
3952 VALIST.__args = addr + paddedsize;
3953 ret = *(TYPE *)addr;
3956 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3957 gimple_seq * post_p ATTRIBUTE_UNUSED)
3959 tree f_args, f_skip;
3961 HOST_WIDE_INT size, rsize;
3962 tree paddedsize, addr, tmp;
3963 bool pass_by_reference_p;
3965 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3966 f_skip = TREE_CHAIN (f_args);
3968 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3970 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3972 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3974 addr = create_tmp_var (ptr_type_node, "va_arg");
3975 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3977 /* if an object is dynamically sized, a pointer to it is passed
3978 instead of the object itself. */
3979 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3981 if (pass_by_reference_p)
3982 type = build_pointer_type (type);
3983 size = int_size_in_bytes (type);
3984 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3986 /* build conditional expression to calculate addr. The expression
3987 will be gimplified later. */
3988 paddedsize = size_int (rsize);
3989 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
3990 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3991 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3992 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3993 unshare_expr (skip)));
3995 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3996 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3997 size_int (32)), unshare_expr (args));
3999 gimplify_assign (addr, tmp, pre_p);
4001 /* update VALIST.__args */
4002 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4003 gimplify_assign (unshare_expr (args), tmp, pre_p);
4005 addr = fold_convert (build_pointer_type (type), addr);
4007 if (pass_by_reference_p)
4008 addr = build_va_arg_indirect_ref (addr);
4010 return build_va_arg_indirect_ref (addr);
4013 /* Save parameter registers starting with the register that corresponds
4014 to the first unnamed parameters. If the first unnamed parameter is
4015 in the stack then save no registers. Set pretend_args_size to the
4016 amount of space needed to save the registers. */
4018 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4019 tree type, int *pretend_size, int no_rtl)
4028 /* cum currently points to the last named argument, we want to
4029 start at the next argument. */
4030 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4032 offset = -STACK_POINTER_OFFSET;
4033 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4035 tmp = gen_frame_mem (V4SImode,
4036 plus_constant (virtual_incoming_args_rtx,
4038 emit_move_insn (tmp,
4039 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4042 *pretend_size = offset + STACK_POINTER_OFFSET;
4047 spu_conditional_register_usage (void)
4051 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4052 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4056 /* This is called to decide when we can simplify a load instruction. We
4057 must only return true for registers which we know will always be
4058 aligned. Taking into account that CSE might replace this reg with
4059 another one that has not been marked aligned.
4060 So this is really only true for frame, stack and virtual registers,
4061 which we know are always aligned and should not be adversely effected
4064 regno_aligned_for_load (int regno)
4066 return regno == FRAME_POINTER_REGNUM
4067 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4068 || regno == ARG_POINTER_REGNUM
4069 || regno == STACK_POINTER_REGNUM
4070 || (regno >= FIRST_VIRTUAL_REGISTER
4071 && regno <= LAST_VIRTUAL_REGISTER);
4074 /* Return TRUE when mem is known to be 16-byte aligned. */
4076 aligned_mem_p (rtx mem)
4078 if (MEM_ALIGN (mem) >= 128)
4080 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4082 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4084 rtx p0 = XEXP (XEXP (mem, 0), 0);
4085 rtx p1 = XEXP (XEXP (mem, 0), 1);
4086 if (regno_aligned_for_load (REGNO (p0)))
4088 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4090 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4094 else if (GET_CODE (XEXP (mem, 0)) == REG)
4096 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4099 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4101 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4103 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4104 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4105 if (GET_CODE (p0) == SYMBOL_REF
4106 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4112 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4113 into its SYMBOL_REF_FLAGS. */
4115 spu_encode_section_info (tree decl, rtx rtl, int first)
4117 default_encode_section_info (decl, rtl, first);
4119 /* If a variable has a forced alignment to < 16 bytes, mark it with
4120 SYMBOL_FLAG_ALIGN1. */
4121 if (TREE_CODE (decl) == VAR_DECL
4122 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4123 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4126 /* Return TRUE if we are certain the mem refers to a complete object
4127 which is both 16-byte aligned and padded to a 16-byte boundary. This
4128 would make it safe to store with a single instruction.
4129 We guarantee the alignment and padding for static objects by aligning
4130 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4131 FIXME: We currently cannot guarantee this for objects on the stack
4132 because assign_parm_setup_stack calls assign_stack_local with the
4133 alignment of the parameter mode and in that case the alignment never
4134 gets adjusted by LOCAL_ALIGNMENT. */
4136 store_with_one_insn_p (rtx mem)
4138 rtx addr = XEXP (mem, 0);
4139 if (GET_MODE (mem) == BLKmode)
4141 /* Only static objects. */
4142 if (GET_CODE (addr) == SYMBOL_REF)
4144 /* We use the associated declaration to make sure the access is
4145 referring to the whole object.
4146 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4147 if it is necessary. Will there be cases where one exists, and
4148 the other does not? Will there be cases where both exist, but
4149 have different types? */
4150 tree decl = MEM_EXPR (mem);
4152 && TREE_CODE (decl) == VAR_DECL
4153 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4155 decl = SYMBOL_REF_DECL (addr);
4157 && TREE_CODE (decl) == VAR_DECL
4158 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4165 spu_expand_mov (rtx * ops, enum machine_mode mode)
4167 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4170 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4172 rtx from = SUBREG_REG (ops[1]);
4173 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4175 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4176 && GET_MODE_CLASS (imode) == MODE_INT
4177 && subreg_lowpart_p (ops[1]));
4179 if (GET_MODE_SIZE (imode) < 4)
4181 if (imode != GET_MODE (from))
4182 from = gen_rtx_SUBREG (imode, from, 0);
4184 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4186 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4187 emit_insn (GEN_FCN (icode) (ops[0], from));
4190 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4194 /* At least one of the operands needs to be a register. */
4195 if ((reload_in_progress | reload_completed) == 0
4196 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4198 rtx temp = force_reg (mode, ops[1]);
4199 emit_move_insn (ops[0], temp);
4202 if (reload_in_progress || reload_completed)
4204 if (CONSTANT_P (ops[1]))
4205 return spu_split_immediate (ops);
4210 if (GET_CODE (ops[0]) == MEM)
4212 if (!spu_valid_move (ops))
4214 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4215 gen_reg_rtx (TImode)));
4219 else if (GET_CODE (ops[1]) == MEM)
4221 if (!spu_valid_move (ops))
4224 (ops[0], ops[1], gen_reg_rtx (TImode),
4225 gen_reg_rtx (SImode)));
4229 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4231 if (GET_CODE (ops[1]) == CONST_INT)
4233 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4234 if (val != INTVAL (ops[1]))
4236 emit_move_insn (ops[0], GEN_INT (val));
4245 spu_split_load (rtx * ops)
4247 enum machine_mode mode = GET_MODE (ops[0]);
4248 rtx addr, load, rot, mem, p0, p1;
4251 addr = XEXP (ops[1], 0);
4255 if (GET_CODE (addr) == PLUS)
4258 aligned reg + aligned reg => lqx
4259 aligned reg + unaligned reg => lqx, rotqby
4260 aligned reg + aligned const => lqd
4261 aligned reg + unaligned const => lqd, rotqbyi
4262 unaligned reg + aligned reg => lqx, rotqby
4263 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4264 unaligned reg + aligned const => lqd, rotqby
4265 unaligned reg + unaligned const -> not allowed by legitimate address
4267 p0 = XEXP (addr, 0);
4268 p1 = XEXP (addr, 1);
4269 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4271 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4273 emit_insn (gen_addsi3 (ops[3], p0, p1));
4281 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4283 rot_amt = INTVAL (p1) & 15;
4284 p1 = GEN_INT (INTVAL (p1) & -16);
4285 addr = gen_rtx_PLUS (SImode, p0, p1);
4287 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4291 else if (GET_CODE (addr) == REG)
4293 if (!regno_aligned_for_load (REGNO (addr)))
4296 else if (GET_CODE (addr) == CONST)
4298 if (GET_CODE (XEXP (addr, 0)) == PLUS
4299 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4300 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4302 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4304 addr = gen_rtx_CONST (Pmode,
4305 gen_rtx_PLUS (Pmode,
4306 XEXP (XEXP (addr, 0), 0),
4307 GEN_INT (rot_amt & -16)));
4309 addr = XEXP (XEXP (addr, 0), 0);
4314 else if (GET_CODE (addr) == CONST_INT)
4316 rot_amt = INTVAL (addr);
4317 addr = GEN_INT (rot_amt & -16);
4319 else if (!ALIGNED_SYMBOL_REF_P (addr))
4322 if (GET_MODE_SIZE (mode) < 4)
4323 rot_amt += GET_MODE_SIZE (mode) - 4;
4329 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4336 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4337 mem = change_address (ops[1], TImode, addr);
4339 emit_insn (gen_movti (load, mem));
4342 emit_insn (gen_rotqby_ti (load, load, rot));
4344 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4346 if (reload_completed)
4347 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4349 emit_insn (gen_spu_convert (ops[0], load));
4353 spu_split_store (rtx * ops)
4355 enum machine_mode mode = GET_MODE (ops[0]);
4358 rtx addr, p0, p1, p1_lo, smem;
4362 addr = XEXP (ops[0], 0);
4364 if (GET_CODE (addr) == PLUS)
4367 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4368 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4369 aligned reg + aligned const => lqd, c?d, shuf, stqx
4370 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4371 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4372 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4373 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4374 unaligned reg + unaligned const -> not allowed by legitimate address
4377 p0 = XEXP (addr, 0);
4378 p1 = p1_lo = XEXP (addr, 1);
4379 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4381 p1_lo = GEN_INT (INTVAL (p1) & 15);
4382 p1 = GEN_INT (INTVAL (p1) & -16);
4383 addr = gen_rtx_PLUS (SImode, p0, p1);
4386 else if (GET_CODE (addr) == REG)
4390 p1 = p1_lo = const0_rtx;
4395 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4396 p1 = 0; /* aform doesn't use p1 */
4398 if (ALIGNED_SYMBOL_REF_P (addr))
4400 else if (GET_CODE (addr) == CONST)
4402 if (GET_CODE (XEXP (addr, 0)) == PLUS
4403 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4404 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4406 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4408 addr = gen_rtx_CONST (Pmode,
4409 gen_rtx_PLUS (Pmode,
4410 XEXP (XEXP (addr, 0), 0),
4411 GEN_INT (v & -16)));
4413 addr = XEXP (XEXP (addr, 0), 0);
4414 p1_lo = GEN_INT (v & 15);
4417 else if (GET_CODE (addr) == CONST_INT)
4419 p1_lo = GEN_INT (INTVAL (addr) & 15);
4420 addr = GEN_INT (INTVAL (addr) & -16);
4424 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4426 scalar = store_with_one_insn_p (ops[0]);
4429 /* We could copy the flags from the ops[0] MEM to mem here,
4430 We don't because we want this load to be optimized away if
4431 possible, and copying the flags will prevent that in certain
4432 cases, e.g. consider the volatile flag. */
4434 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4435 set_mem_alias_set (lmem, 0);
4436 emit_insn (gen_movti (reg, lmem));
4438 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4439 p0 = stack_pointer_rtx;
4443 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4444 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4446 else if (reload_completed)
4448 if (GET_CODE (ops[1]) == REG)
4449 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4450 else if (GET_CODE (ops[1]) == SUBREG)
4451 emit_move_insn (reg,
4452 gen_rtx_REG (GET_MODE (reg),
4453 REGNO (SUBREG_REG (ops[1]))));
4459 if (GET_CODE (ops[1]) == REG)
4460 emit_insn (gen_spu_convert (reg, ops[1]));
4461 else if (GET_CODE (ops[1]) == SUBREG)
4462 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4467 if (GET_MODE_SIZE (mode) < 4 && scalar)
4468 emit_insn (gen_shlqby_ti
4469 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4471 smem = change_address (ops[0], TImode, addr);
4472 /* We can't use the previous alias set because the memory has changed
4473 size and can potentially overlap objects of other types. */
4474 set_mem_alias_set (smem, 0);
4476 emit_insn (gen_movti (smem, reg));
4479 /* Return TRUE if X is MEM which is a struct member reference
4480 and the member can safely be loaded and stored with a single
4481 instruction because it is padded. */
4483 mem_is_padded_component_ref (rtx x)
4485 tree t = MEM_EXPR (x);
4487 if (!t || TREE_CODE (t) != COMPONENT_REF)
4489 t = TREE_OPERAND (t, 1);
4490 if (!t || TREE_CODE (t) != FIELD_DECL
4491 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4493 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4494 r = DECL_FIELD_CONTEXT (t);
4495 if (!r || TREE_CODE (r) != RECORD_TYPE)
4497 /* Make sure they are the same mode */
4498 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4500 /* If there are no following fields then the field alignment assures
4501 the structure is padded to the alignment which means this field is
4503 if (TREE_CHAIN (t) == 0)
4505 /* If the following field is also aligned then this field will be
4508 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4513 /* Parse the -mfixed-range= option string. */
4515 fix_range (const char *const_str)
4518 char *str, *dash, *comma;
4520 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4521 REG2 are either register names or register numbers. The effect
4522 of this option is to mark the registers in the range from REG1 to
4523 REG2 as ``fixed'' so they won't be used by the compiler. */
4525 i = strlen (const_str);
4526 str = (char *) alloca (i + 1);
4527 memcpy (str, const_str, i + 1);
4531 dash = strchr (str, '-');
4534 warning (0, "value of -mfixed-range must have form REG1-REG2");
4538 comma = strchr (dash + 1, ',');
4542 first = decode_reg_name (str);
4545 warning (0, "unknown register name: %s", str);
4549 last = decode_reg_name (dash + 1);
4552 warning (0, "unknown register name: %s", dash + 1);
4560 warning (0, "%s-%s is an empty range", str, dash + 1);
4564 for (i = first; i <= last; ++i)
4565 fixed_regs[i] = call_used_regs[i] = 1;
4576 spu_valid_move (rtx * ops)
4578 enum machine_mode mode = GET_MODE (ops[0]);
4579 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4582 /* init_expr_once tries to recog against load and store insns to set
4583 the direct_load[] and direct_store[] arrays. We always want to
4584 consider those loads and stores valid. init_expr_once is called in
4585 the context of a dummy function which does not have a decl. */
4586 if (cfun->decl == 0)
4589 /* Don't allows loads/stores which would require more than 1 insn.
4590 During and after reload we assume loads and stores only take 1
4592 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4594 if (GET_CODE (ops[0]) == MEM
4595 && (GET_MODE_SIZE (mode) < 4
4596 || !(store_with_one_insn_p (ops[0])
4597 || mem_is_padded_component_ref (ops[0]))))
4599 if (GET_CODE (ops[1]) == MEM
4600 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4606 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4607 can be generated using the fsmbi instruction. */
4609 fsmbi_const_p (rtx x)
4613 /* We can always choose TImode for CONST_INT because the high bits
4614 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4615 enum immediate_class c = classify_immediate (x, TImode);
4616 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4621 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4622 can be generated using the cbd, chd, cwd or cdd instruction. */
4624 cpat_const_p (rtx x, enum machine_mode mode)
4628 enum immediate_class c = classify_immediate (x, mode);
4629 return c == IC_CPAT;
4635 gen_cpat_const (rtx * ops)
4637 unsigned char dst[16];
4638 int i, offset, shift, isize;
4639 if (GET_CODE (ops[3]) != CONST_INT
4640 || GET_CODE (ops[2]) != CONST_INT
4641 || (GET_CODE (ops[1]) != CONST_INT
4642 && GET_CODE (ops[1]) != REG))
4644 if (GET_CODE (ops[1]) == REG
4645 && (!REG_POINTER (ops[1])
4646 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4649 for (i = 0; i < 16; i++)
4651 isize = INTVAL (ops[3]);
4654 else if (isize == 2)
4658 offset = (INTVAL (ops[2]) +
4659 (GET_CODE (ops[1]) ==
4660 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4661 for (i = 0; i < isize; i++)
4662 dst[offset + i] = i + shift;
4663 return array_to_constant (TImode, dst);
4666 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4667 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4668 than 16 bytes, the value is repeated across the rest of the array. */
4670 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4675 memset (arr, 0, 16);
4676 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4677 if (GET_CODE (x) == CONST_INT
4678 || (GET_CODE (x) == CONST_DOUBLE
4679 && (mode == SFmode || mode == DFmode)))
4681 gcc_assert (mode != VOIDmode && mode != BLKmode);
4683 if (GET_CODE (x) == CONST_DOUBLE)
4684 val = const_double_to_hwint (x);
4687 first = GET_MODE_SIZE (mode) - 1;
4688 for (i = first; i >= 0; i--)
4690 arr[i] = val & 0xff;
4693 /* Splat the constant across the whole array. */
4694 for (j = 0, i = first + 1; i < 16; i++)
4697 j = (j == first) ? 0 : j + 1;
4700 else if (GET_CODE (x) == CONST_DOUBLE)
4702 val = CONST_DOUBLE_LOW (x);
4703 for (i = 15; i >= 8; i--)
4705 arr[i] = val & 0xff;
4708 val = CONST_DOUBLE_HIGH (x);
4709 for (i = 7; i >= 0; i--)
4711 arr[i] = val & 0xff;
4715 else if (GET_CODE (x) == CONST_VECTOR)
4719 mode = GET_MODE_INNER (mode);
4720 units = CONST_VECTOR_NUNITS (x);
4721 for (i = 0; i < units; i++)
4723 elt = CONST_VECTOR_ELT (x, i);
4724 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4726 if (GET_CODE (elt) == CONST_DOUBLE)
4727 val = const_double_to_hwint (elt);
4730 first = GET_MODE_SIZE (mode) - 1;
4731 if (first + i * GET_MODE_SIZE (mode) > 16)
4733 for (j = first; j >= 0; j--)
4735 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4745 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4746 smaller than 16 bytes, use the bytes that would represent that value
4747 in a register, e.g., for QImode return the value of arr[3]. */
4749 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4751 enum machine_mode inner_mode;
4753 int units, size, i, j, k;
4756 if (GET_MODE_CLASS (mode) == MODE_INT
4757 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4759 j = GET_MODE_SIZE (mode);
4760 i = j < 4 ? 4 - j : 0;
4761 for (val = 0; i < j; i++)
4762 val = (val << 8) | arr[i];
4763 val = trunc_int_for_mode (val, mode);
4764 return GEN_INT (val);
4770 for (i = high = 0; i < 8; i++)
4771 high = (high << 8) | arr[i];
4772 for (i = 8, val = 0; i < 16; i++)
4773 val = (val << 8) | arr[i];
4774 return immed_double_const (val, high, TImode);
4778 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4779 val = trunc_int_for_mode (val, SImode);
4780 return hwint_to_const_double (SFmode, val);
4784 for (i = 0, val = 0; i < 8; i++)
4785 val = (val << 8) | arr[i];
4786 return hwint_to_const_double (DFmode, val);
4789 if (!VECTOR_MODE_P (mode))
4792 units = GET_MODE_NUNITS (mode);
4793 size = GET_MODE_UNIT_SIZE (mode);
4794 inner_mode = GET_MODE_INNER (mode);
4795 v = rtvec_alloc (units);
4797 for (k = i = 0; i < units; ++i)
4800 for (j = 0; j < size; j++, k++)
4801 val = (val << 8) | arr[k];
4803 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4804 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4806 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4811 return gen_rtx_CONST_VECTOR (mode, v);
4815 reloc_diagnostic (rtx x)
4817 tree loc_decl, decl = 0;
4819 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4822 if (GET_CODE (x) == SYMBOL_REF)
4823 decl = SYMBOL_REF_DECL (x);
4824 else if (GET_CODE (x) == CONST
4825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4826 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4828 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4829 if (decl && !DECL_P (decl))
4832 /* We use last_assemble_variable_decl to get line information. It's
4833 not always going to be right and might not even be close, but will
4834 be right for the more common cases. */
4835 if (!last_assemble_variable_decl || in_section == ctors_section)
4838 loc_decl = last_assemble_variable_decl;
4840 /* The decl could be a string constant. */
4841 if (decl && DECL_P (decl))
4842 msg = "%Jcreating run-time relocation for %qD";
4844 msg = "creating run-time relocation";
4846 if (TARGET_WARN_RELOC)
4847 warning (0, msg, loc_decl, decl);
4849 error (msg, loc_decl, decl);
4852 /* Hook into assemble_integer so we can generate an error for run-time
4853 relocations. The SPU ABI disallows them. */
4855 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4857 /* By default run-time relocations aren't supported, but we allow them
4858 in case users support it in their own run-time loader. And we provide
4859 a warning for those users that don't. */
4860 if ((GET_CODE (x) == SYMBOL_REF)
4861 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4862 reloc_diagnostic (x);
4864 return default_assemble_integer (x, size, aligned_p);
4868 spu_asm_globalize_label (FILE * file, const char *name)
4870 fputs ("\t.global\t", file);
4871 assemble_name (file, name);
4876 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4877 bool speed ATTRIBUTE_UNUSED)
4879 enum machine_mode mode = GET_MODE (x);
4880 int cost = COSTS_N_INSNS (2);
4882 /* Folding to a CONST_VECTOR will use extra space but there might
4883 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4884 only if it allows us to fold away multiple insns. Changing the cost
4885 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4886 because this cost will only be compared against a single insn.
4887 if (code == CONST_VECTOR)
4888 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4891 /* Use defaults for float operations. Not accurate but good enough. */
4894 *total = COSTS_N_INSNS (13);
4899 *total = COSTS_N_INSNS (6);
4905 if (satisfies_constraint_K (x))
4907 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4908 *total = COSTS_N_INSNS (1);
4910 *total = COSTS_N_INSNS (3);
4914 *total = COSTS_N_INSNS (3);
4919 *total = COSTS_N_INSNS (0);
4923 *total = COSTS_N_INSNS (5);
4927 case FLOAT_TRUNCATE:
4929 case UNSIGNED_FLOAT:
4932 *total = COSTS_N_INSNS (7);
4938 *total = COSTS_N_INSNS (9);
4945 GET_CODE (XEXP (x, 0)) ==
4946 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4947 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4951 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4952 cost = COSTS_N_INSNS (14);
4953 if ((val & 0xffff) == 0)
4954 cost = COSTS_N_INSNS (9);
4955 else if (val > 0 && val < 0x10000)
4956 cost = COSTS_N_INSNS (11);
4965 *total = COSTS_N_INSNS (20);
4972 *total = COSTS_N_INSNS (4);
4975 if (XINT (x, 1) == UNSPEC_CONVERT)
4976 *total = COSTS_N_INSNS (0);
4978 *total = COSTS_N_INSNS (4);
4981 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4982 if (GET_MODE_CLASS (mode) == MODE_INT
4983 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4984 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4985 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4990 static enum machine_mode
4991 spu_unwind_word_mode (void)
4996 /* Decide whether we can make a sibling call to a function. DECL is the
4997 declaration of the function being targeted by the call and EXP is the
4998 CALL_EXPR representing the call. */
5000 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5002 return decl && !TARGET_LARGE_MEM;
5005 /* We need to correctly update the back chain pointer and the Available
5006 Stack Size (which is in the second slot of the sp register.) */
5008 spu_allocate_stack (rtx op0, rtx op1)
5011 rtx chain = gen_reg_rtx (V4SImode);
5012 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5013 rtx sp = gen_reg_rtx (V4SImode);
5014 rtx splatted = gen_reg_rtx (V4SImode);
5015 rtx pat = gen_reg_rtx (TImode);
5017 /* copy the back chain so we can save it back again. */
5018 emit_move_insn (chain, stack_bot);
5020 op1 = force_reg (SImode, op1);
5022 v = 0x1020300010203ll;
5023 emit_move_insn (pat, immed_double_const (v, v, TImode));
5024 emit_insn (gen_shufb (splatted, op1, op1, pat));
5026 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5027 emit_insn (gen_subv4si3 (sp, sp, splatted));
5029 if (flag_stack_check)
5031 rtx avail = gen_reg_rtx(SImode);
5032 rtx result = gen_reg_rtx(SImode);
5033 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5034 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5035 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5038 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5040 emit_move_insn (stack_bot, chain);
5042 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5046 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5048 static unsigned char arr[16] =
5049 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5050 rtx temp = gen_reg_rtx (SImode);
5051 rtx temp2 = gen_reg_rtx (SImode);
5052 rtx temp3 = gen_reg_rtx (V4SImode);
5053 rtx temp4 = gen_reg_rtx (V4SImode);
5054 rtx pat = gen_reg_rtx (TImode);
5055 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5057 /* Restore the backchain from the first word, sp from the second. */
5058 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5059 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5061 emit_move_insn (pat, array_to_constant (TImode, arr));
5063 /* Compute Available Stack Size for sp */
5064 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5065 emit_insn (gen_shufb (temp3, temp, temp, pat));
5067 /* Compute Available Stack Size for back chain */
5068 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5069 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5070 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5072 emit_insn (gen_addv4si3 (sp, sp, temp3));
5073 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5077 spu_init_libfuncs (void)
5079 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5080 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5081 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5082 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5083 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5084 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5085 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5086 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5087 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5088 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5089 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5091 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5092 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5094 set_optab_libfunc (smul_optab, TImode, "__multi3");
5095 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5096 set_optab_libfunc (smod_optab, TImode, "__modti3");
5097 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5098 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5099 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5102 /* Make a subreg, stripping any existing subreg. We could possibly just
5103 call simplify_subreg, but in this case we know what we want. */
5105 spu_gen_subreg (enum machine_mode mode, rtx x)
5107 if (GET_CODE (x) == SUBREG)
5109 if (GET_MODE (x) == mode)
5111 return gen_rtx_SUBREG (mode, x, 0);
5115 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5117 return (TYPE_MODE (type) == BLKmode
5119 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5120 || int_size_in_bytes (type) >
5121 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5124 /* Create the built-in types and functions */
5126 enum spu_function_code
5128 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5129 #include "spu-builtins.def"
5134 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5136 struct spu_builtin_description spu_builtins[] = {
5137 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5138 {fcode, icode, name, type, params, NULL_TREE},
5139 #include "spu-builtins.def"
5144 spu_init_builtins (void)
5146 struct spu_builtin_description *d;
5149 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5150 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5151 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5152 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5153 V4SF_type_node = build_vector_type (float_type_node, 4);
5154 V2DF_type_node = build_vector_type (double_type_node, 2);
5156 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5157 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5158 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5159 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5161 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5163 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5164 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5165 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5166 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5167 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5168 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5169 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5170 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5171 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5172 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5173 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5174 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5176 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5177 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5178 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5179 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5180 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5181 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5182 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5183 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5185 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5186 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5188 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5190 spu_builtin_types[SPU_BTI_PTR] =
5191 build_pointer_type (build_qualified_type
5193 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5195 /* For each builtin we build a new prototype. The tree code will make
5196 sure nodes are shared. */
5197 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5200 char name[64]; /* build_function will make a copy. */
5206 /* Find last parm. */
5207 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5212 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5214 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5216 sprintf (name, "__builtin_%s", d->name);
5218 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5220 if (d->fcode == SPU_MASK_FOR_LOAD)
5221 TREE_READONLY (d->fndecl) = 1;
5223 /* These builtins don't throw. */
5224 TREE_NOTHROW (d->fndecl) = 1;
5229 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5231 static unsigned char arr[16] =
5232 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5234 rtx temp = gen_reg_rtx (Pmode);
5235 rtx temp2 = gen_reg_rtx (V4SImode);
5236 rtx temp3 = gen_reg_rtx (V4SImode);
5237 rtx pat = gen_reg_rtx (TImode);
5238 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5240 emit_move_insn (pat, array_to_constant (TImode, arr));
5242 /* Restore the sp. */
5243 emit_move_insn (temp, op1);
5244 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5246 /* Compute available stack size for sp. */
5247 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5248 emit_insn (gen_shufb (temp3, temp, temp, pat));
5250 emit_insn (gen_addv4si3 (sp, sp, temp3));
5251 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5255 spu_safe_dma (HOST_WIDE_INT channel)
5257 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5261 spu_builtin_splats (rtx ops[])
5263 enum machine_mode mode = GET_MODE (ops[0]);
5264 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5266 unsigned char arr[16];
5267 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5268 emit_move_insn (ops[0], array_to_constant (mode, arr));
5272 rtx reg = gen_reg_rtx (TImode);
5274 if (GET_CODE (ops[1]) != REG
5275 && GET_CODE (ops[1]) != SUBREG)
5276 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5282 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5288 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5293 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5298 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5304 emit_move_insn (reg, shuf);
5305 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5310 spu_builtin_extract (rtx ops[])
5312 enum machine_mode mode;
5315 mode = GET_MODE (ops[1]);
5317 if (GET_CODE (ops[2]) == CONST_INT)
5322 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5325 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5328 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5331 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5334 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5337 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5345 from = spu_gen_subreg (TImode, ops[1]);
5346 rot = gen_reg_rtx (TImode);
5347 tmp = gen_reg_rtx (SImode);
5352 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5355 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5356 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5360 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5364 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5369 emit_insn (gen_rotqby_ti (rot, from, tmp));
5371 emit_insn (gen_spu_convert (ops[0], rot));
5375 spu_builtin_insert (rtx ops[])
5377 enum machine_mode mode = GET_MODE (ops[0]);
5378 enum machine_mode imode = GET_MODE_INNER (mode);
5379 rtx mask = gen_reg_rtx (TImode);
5382 if (GET_CODE (ops[3]) == CONST_INT)
5383 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5386 offset = gen_reg_rtx (SImode);
5387 emit_insn (gen_mulsi3
5388 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5391 (mask, stack_pointer_rtx, offset,
5392 GEN_INT (GET_MODE_SIZE (imode))));
5393 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5397 spu_builtin_promote (rtx ops[])
5399 enum machine_mode mode, imode;
5400 rtx rot, from, offset;
5403 mode = GET_MODE (ops[0]);
5404 imode = GET_MODE_INNER (mode);
5406 from = gen_reg_rtx (TImode);
5407 rot = spu_gen_subreg (TImode, ops[0]);
5409 emit_insn (gen_spu_convert (from, ops[1]));
5411 if (GET_CODE (ops[2]) == CONST_INT)
5413 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5414 if (GET_MODE_SIZE (imode) < 4)
5415 pos += 4 - GET_MODE_SIZE (imode);
5416 offset = GEN_INT (pos & 15);
5420 offset = gen_reg_rtx (SImode);
5424 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5427 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5428 emit_insn (gen_addsi3 (offset, offset, offset));
5432 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5433 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5437 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5443 emit_insn (gen_rotqby_ti (rot, from, offset));
5447 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5449 rtx shuf = gen_reg_rtx (V4SImode);
5450 rtx insn = gen_reg_rtx (V4SImode);
5455 fnaddr = force_reg (SImode, fnaddr);
5456 cxt = force_reg (SImode, cxt);
5458 if (TARGET_LARGE_MEM)
5460 rtx rotl = gen_reg_rtx (V4SImode);
5461 rtx mask = gen_reg_rtx (V4SImode);
5462 rtx bi = gen_reg_rtx (SImode);
5463 unsigned char shufa[16] = {
5464 2, 3, 0, 1, 18, 19, 16, 17,
5465 0, 1, 2, 3, 16, 17, 18, 19
5467 unsigned char insna[16] = {
5469 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5471 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5474 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5475 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5477 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5478 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5479 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5480 emit_insn (gen_selb (insn, insnc, rotl, mask));
5482 mem = memory_address (Pmode, tramp);
5483 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5485 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5486 mem = memory_address (Pmode, plus_constant (tramp, 16));
5487 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5491 rtx scxt = gen_reg_rtx (SImode);
5492 rtx sfnaddr = gen_reg_rtx (SImode);
5493 unsigned char insna[16] = {
5494 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5500 shufc = gen_reg_rtx (TImode);
5501 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5503 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5504 fits 18 bits and the last 4 are zeros. This will be true if
5505 the stack pointer is initialized to 0x3fff0 at program start,
5506 otherwise the ila instruction will be garbage. */
5508 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5509 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5511 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5512 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5513 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5515 mem = memory_address (Pmode, tramp);
5516 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5519 emit_insn (gen_sync ());
5523 spu_expand_sign_extend (rtx ops[])
5525 unsigned char arr[16];
5526 rtx pat = gen_reg_rtx (TImode);
5529 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5530 if (GET_MODE (ops[1]) == QImode)
5532 sign = gen_reg_rtx (HImode);
5533 emit_insn (gen_extendqihi2 (sign, ops[1]));
5534 for (i = 0; i < 16; i++)
5540 for (i = 0; i < 16; i++)
5542 switch (GET_MODE (ops[1]))
5545 sign = gen_reg_rtx (SImode);
5546 emit_insn (gen_extendhisi2 (sign, ops[1]));
5548 arr[last - 1] = 0x02;
5551 sign = gen_reg_rtx (SImode);
5552 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5553 for (i = 0; i < 4; i++)
5554 arr[last - i] = 3 - i;
5557 sign = gen_reg_rtx (SImode);
5558 c = gen_reg_rtx (SImode);
5559 emit_insn (gen_spu_convert (c, ops[1]));
5560 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5561 for (i = 0; i < 8; i++)
5562 arr[last - i] = 7 - i;
5568 emit_move_insn (pat, array_to_constant (TImode, arr));
5569 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5572 /* expand vector initialization. If there are any constant parts,
5573 load constant parts first. Then load any non-constant parts. */
5575 spu_expand_vector_init (rtx target, rtx vals)
5577 enum machine_mode mode = GET_MODE (target);
5578 int n_elts = GET_MODE_NUNITS (mode);
5580 bool all_same = true;
5581 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5584 first = XVECEXP (vals, 0, 0);
5585 for (i = 0; i < n_elts; ++i)
5587 x = XVECEXP (vals, 0, i);
5588 if (!(CONST_INT_P (x)
5589 || GET_CODE (x) == CONST_DOUBLE
5590 || GET_CODE (x) == CONST_FIXED))
5594 if (first_constant == NULL_RTX)
5597 if (i > 0 && !rtx_equal_p (x, first))
5601 /* if all elements are the same, use splats to repeat elements */
5604 if (!CONSTANT_P (first)
5605 && !register_operand (first, GET_MODE (x)))
5606 first = force_reg (GET_MODE (first), first);
5607 emit_insn (gen_spu_splats (target, first));
5611 /* load constant parts */
5612 if (n_var != n_elts)
5616 emit_move_insn (target,
5617 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5621 rtx constant_parts_rtx = copy_rtx (vals);
5623 gcc_assert (first_constant != NULL_RTX);
5624 /* fill empty slots with the first constant, this increases
5625 our chance of using splats in the recursive call below. */
5626 for (i = 0; i < n_elts; ++i)
5628 x = XVECEXP (constant_parts_rtx, 0, i);
5629 if (!(CONST_INT_P (x)
5630 || GET_CODE (x) == CONST_DOUBLE
5631 || GET_CODE (x) == CONST_FIXED))
5632 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5635 spu_expand_vector_init (target, constant_parts_rtx);
5639 /* load variable parts */
5642 rtx insert_operands[4];
5644 insert_operands[0] = target;
5645 insert_operands[2] = target;
5646 for (i = 0; i < n_elts; ++i)
5648 x = XVECEXP (vals, 0, i);
5649 if (!(CONST_INT_P (x)
5650 || GET_CODE (x) == CONST_DOUBLE
5651 || GET_CODE (x) == CONST_FIXED))
5653 if (!register_operand (x, GET_MODE (x)))
5654 x = force_reg (GET_MODE (x), x);
5655 insert_operands[1] = x;
5656 insert_operands[3] = GEN_INT (i);
5657 spu_builtin_insert (insert_operands);
5663 /* Return insn index for the vector compare instruction for given CODE,
5664 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5667 get_vec_cmp_insn (enum rtx_code code,
5668 enum machine_mode dest_mode,
5669 enum machine_mode op_mode)
5675 if (dest_mode == V16QImode && op_mode == V16QImode)
5676 return CODE_FOR_ceq_v16qi;
5677 if (dest_mode == V8HImode && op_mode == V8HImode)
5678 return CODE_FOR_ceq_v8hi;
5679 if (dest_mode == V4SImode && op_mode == V4SImode)
5680 return CODE_FOR_ceq_v4si;
5681 if (dest_mode == V4SImode && op_mode == V4SFmode)
5682 return CODE_FOR_ceq_v4sf;
5683 if (dest_mode == V2DImode && op_mode == V2DFmode)
5684 return CODE_FOR_ceq_v2df;
5687 if (dest_mode == V16QImode && op_mode == V16QImode)
5688 return CODE_FOR_cgt_v16qi;
5689 if (dest_mode == V8HImode && op_mode == V8HImode)
5690 return CODE_FOR_cgt_v8hi;
5691 if (dest_mode == V4SImode && op_mode == V4SImode)
5692 return CODE_FOR_cgt_v4si;
5693 if (dest_mode == V4SImode && op_mode == V4SFmode)
5694 return CODE_FOR_cgt_v4sf;
5695 if (dest_mode == V2DImode && op_mode == V2DFmode)
5696 return CODE_FOR_cgt_v2df;
5699 if (dest_mode == V16QImode && op_mode == V16QImode)
5700 return CODE_FOR_clgt_v16qi;
5701 if (dest_mode == V8HImode && op_mode == V8HImode)
5702 return CODE_FOR_clgt_v8hi;
5703 if (dest_mode == V4SImode && op_mode == V4SImode)
5704 return CODE_FOR_clgt_v4si;
5712 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5713 DMODE is expected destination mode. This is a recursive function. */
5716 spu_emit_vector_compare (enum rtx_code rcode,
5718 enum machine_mode dmode)
5722 enum machine_mode dest_mode;
5723 enum machine_mode op_mode = GET_MODE (op1);
5725 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5727 /* Floating point vector compare instructions uses destination V4SImode.
5728 Double floating point vector compare instructions uses destination V2DImode.
5729 Move destination to appropriate mode later. */
5730 if (dmode == V4SFmode)
5731 dest_mode = V4SImode;
5732 else if (dmode == V2DFmode)
5733 dest_mode = V2DImode;
5737 mask = gen_reg_rtx (dest_mode);
5738 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5740 if (vec_cmp_insn == -1)
5742 bool swap_operands = false;
5743 bool try_again = false;
5748 swap_operands = true;
5753 swap_operands = true;
5757 /* Treat A != B as ~(A==B). */
5759 enum insn_code nor_code;
5760 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5761 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5762 gcc_assert (nor_code != CODE_FOR_nothing);
5763 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5764 if (dmode != dest_mode)
5766 rtx temp = gen_reg_rtx (dest_mode);
5767 convert_move (temp, mask, 0);
5777 /* Try GT/GTU/LT/LTU OR EQ */
5780 enum insn_code ior_code;
5781 enum rtx_code new_code;
5785 case GE: new_code = GT; break;
5786 case GEU: new_code = GTU; break;
5787 case LE: new_code = LT; break;
5788 case LEU: new_code = LTU; break;
5793 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5794 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5796 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5797 gcc_assert (ior_code != CODE_FOR_nothing);
5798 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5799 if (dmode != dest_mode)
5801 rtx temp = gen_reg_rtx (dest_mode);
5802 convert_move (temp, mask, 0);
5812 /* You only get two chances. */
5814 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5816 gcc_assert (vec_cmp_insn != -1);
5827 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5828 if (dmode != dest_mode)
5830 rtx temp = gen_reg_rtx (dest_mode);
5831 convert_move (temp, mask, 0);
5838 /* Emit vector conditional expression.
5839 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5840 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5843 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5844 rtx cond, rtx cc_op0, rtx cc_op1)
5846 enum machine_mode dest_mode = GET_MODE (dest);
5847 enum rtx_code rcode = GET_CODE (cond);
5850 /* Get the vector mask for the given relational operations. */
5851 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5853 emit_insn(gen_selb (dest, op2, op1, mask));
5859 spu_force_reg (enum machine_mode mode, rtx op)
5862 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5864 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5865 || GET_MODE (op) == BLKmode)
5866 return force_reg (mode, convert_to_mode (mode, op, 0));
5870 r = force_reg (GET_MODE (op), op);
5871 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5873 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5878 x = gen_reg_rtx (mode);
5879 emit_insn (gen_spu_convert (x, r));
5884 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5886 HOST_WIDE_INT v = 0;
5888 /* Check the range of immediate operands. */
5889 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5891 int range = p - SPU_BTI_7;
5893 if (!CONSTANT_P (op))
5894 error ("%s expects an integer literal in the range [%d, %d].",
5896 spu_builtin_range[range].low, spu_builtin_range[range].high);
5898 if (GET_CODE (op) == CONST
5899 && (GET_CODE (XEXP (op, 0)) == PLUS
5900 || GET_CODE (XEXP (op, 0)) == MINUS))
5902 v = INTVAL (XEXP (XEXP (op, 0), 1));
5903 op = XEXP (XEXP (op, 0), 0);
5905 else if (GET_CODE (op) == CONST_INT)
5907 else if (GET_CODE (op) == CONST_VECTOR
5908 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5909 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5911 /* The default for v is 0 which is valid in every range. */
5912 if (v < spu_builtin_range[range].low
5913 || v > spu_builtin_range[range].high)
5914 error ("%s expects an integer literal in the range [%d, %d]. ("
5915 HOST_WIDE_INT_PRINT_DEC ")",
5917 spu_builtin_range[range].low, spu_builtin_range[range].high,
5926 /* This is only used in lqa, and stqa. Even though the insns
5927 encode 16 bits of the address (all but the 2 least
5928 significant), only 14 bits are used because it is masked to
5929 be 16 byte aligned. */
5933 /* This is used for lqr and stqr. */
5940 if (GET_CODE (op) == LABEL_REF
5941 || (GET_CODE (op) == SYMBOL_REF
5942 && SYMBOL_REF_FUNCTION_P (op))
5943 || (v & ((1 << lsbits) - 1)) != 0)
5944 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5951 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5952 rtx target, rtx ops[])
5954 enum insn_code icode = d->icode;
5957 /* Expand the arguments into rtl. */
5959 if (d->parm[0] != SPU_BTI_VOID)
5962 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
5964 tree arg = CALL_EXPR_ARG (exp, a);
5967 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5970 /* The insn pattern may have additional operands (SCRATCH).
5971 Return the number of actual non-SCRATCH operands. */
5972 gcc_assert (i <= insn_data[icode].n_operands);
5977 spu_expand_builtin_1 (struct spu_builtin_description *d,
5978 tree exp, rtx target)
5982 enum insn_code icode = d->icode;
5983 enum machine_mode mode, tmode;
5988 /* Set up ops[] with values from arglist. */
5989 n_operands = expand_builtin_args (d, exp, target, ops);
5991 /* Handle the target operand which must be operand 0. */
5993 if (d->parm[0] != SPU_BTI_VOID)
5996 /* We prefer the mode specified for the match_operand otherwise
5997 use the mode from the builtin function prototype. */
5998 tmode = insn_data[d->icode].operand[0].mode;
5999 if (tmode == VOIDmode)
6000 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6002 /* Try to use target because not using it can lead to extra copies
6003 and when we are using all of the registers extra copies leads
6005 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6008 target = ops[0] = gen_reg_rtx (tmode);
6010 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6016 if (d->fcode == SPU_MASK_FOR_LOAD)
6018 enum machine_mode mode = insn_data[icode].operand[1].mode;
6023 arg = CALL_EXPR_ARG (exp, 0);
6024 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6025 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6026 addr = memory_address (mode, op);
6029 op = gen_reg_rtx (GET_MODE (addr));
6030 emit_insn (gen_rtx_SET (VOIDmode, op,
6031 gen_rtx_NEG (GET_MODE (addr), addr)));
6032 op = gen_rtx_MEM (mode, op);
6034 pat = GEN_FCN (icode) (target, op);
6041 /* Ignore align_hint, but still expand it's args in case they have
6043 if (icode == CODE_FOR_spu_align_hint)
6046 /* Handle the rest of the operands. */
6047 for (p = 1; i < n_operands; i++, p++)
6049 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6050 mode = insn_data[d->icode].operand[i].mode;
6052 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6054 /* mode can be VOIDmode here for labels */
6056 /* For specific intrinsics with an immediate operand, e.g.,
6057 si_ai(), we sometimes need to convert the scalar argument to a
6058 vector argument by splatting the scalar. */
6059 if (VECTOR_MODE_P (mode)
6060 && (GET_CODE (ops[i]) == CONST_INT
6061 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6062 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6064 if (GET_CODE (ops[i]) == CONST_INT)
6065 ops[i] = spu_const (mode, INTVAL (ops[i]));
6068 rtx reg = gen_reg_rtx (mode);
6069 enum machine_mode imode = GET_MODE_INNER (mode);
6070 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6071 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6072 if (imode != GET_MODE (ops[i]))
6073 ops[i] = convert_to_mode (imode, ops[i],
6074 TYPE_UNSIGNED (spu_builtin_types
6076 emit_insn (gen_spu_splats (reg, ops[i]));
6081 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6083 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6084 ops[i] = spu_force_reg (mode, ops[i]);
6090 pat = GEN_FCN (icode) (0);
6093 pat = GEN_FCN (icode) (ops[0]);
6096 pat = GEN_FCN (icode) (ops[0], ops[1]);
6099 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6102 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6105 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6108 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6117 if (d->type == B_CALL || d->type == B_BISLED)
6118 emit_call_insn (pat);
6119 else if (d->type == B_JUMP)
6121 emit_jump_insn (pat);
6127 return_type = spu_builtin_types[d->parm[0]];
6128 if (d->parm[0] != SPU_BTI_VOID
6129 && GET_MODE (target) != TYPE_MODE (return_type))
6131 /* target is the return value. It should always be the mode of
6132 the builtin function prototype. */
6133 target = spu_force_reg (TYPE_MODE (return_type), target);
6140 spu_expand_builtin (tree exp,
6142 rtx subtarget ATTRIBUTE_UNUSED,
6143 enum machine_mode mode ATTRIBUTE_UNUSED,
6144 int ignore ATTRIBUTE_UNUSED)
6146 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6147 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6148 struct spu_builtin_description *d;
6150 if (fcode < NUM_SPU_BUILTINS)
6152 d = &spu_builtins[fcode];
6154 return spu_expand_builtin_1 (d, exp, target);
6159 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6161 spu_builtin_mul_widen_even (tree type)
6163 switch (TYPE_MODE (type))
6166 if (TYPE_UNSIGNED (type))
6167 return spu_builtins[SPU_MULE_0].fndecl;
6169 return spu_builtins[SPU_MULE_1].fndecl;
6176 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6178 spu_builtin_mul_widen_odd (tree type)
6180 switch (TYPE_MODE (type))
6183 if (TYPE_UNSIGNED (type))
6184 return spu_builtins[SPU_MULO_1].fndecl;
6186 return spu_builtins[SPU_MULO_0].fndecl;
6193 /* Implement targetm.vectorize.builtin_mask_for_load. */
6195 spu_builtin_mask_for_load (void)
6197 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6202 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6204 spu_builtin_vectorization_cost (bool runtime_test)
6206 /* If the branch of the runtime test is taken - i.e. - the vectorized
6207 version is skipped - this incurs a misprediction cost (because the
6208 vectorized version is expected to be the fall-through). So we subtract
6209 the latency of a mispredicted branch from the costs that are incurred
6210 when the vectorized version is executed. */
6217 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6218 after applying N number of iterations. This routine does not determine
6219 how may iterations are required to reach desired alignment. */
6222 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6227 /* All other types are naturally aligned. */
6231 /* Implement targetm.vectorize.builtin_vec_perm. */
6233 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6235 struct spu_builtin_description *d;
6237 *mask_element_type = unsigned_char_type_node;
6239 switch (TYPE_MODE (type))
6242 if (TYPE_UNSIGNED (type))
6243 d = &spu_builtins[SPU_SHUFFLE_0];
6245 d = &spu_builtins[SPU_SHUFFLE_1];
6249 if (TYPE_UNSIGNED (type))
6250 d = &spu_builtins[SPU_SHUFFLE_2];
6252 d = &spu_builtins[SPU_SHUFFLE_3];
6256 if (TYPE_UNSIGNED (type))
6257 d = &spu_builtins[SPU_SHUFFLE_4];
6259 d = &spu_builtins[SPU_SHUFFLE_5];
6263 if (TYPE_UNSIGNED (type))
6264 d = &spu_builtins[SPU_SHUFFLE_6];
6266 d = &spu_builtins[SPU_SHUFFLE_7];
6270 d = &spu_builtins[SPU_SHUFFLE_8];
6274 d = &spu_builtins[SPU_SHUFFLE_9];
6285 /* Count the total number of instructions in each pipe and return the
6286 maximum, which is used as the Minimum Iteration Interval (MII)
6287 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6288 -2 are instructions that can go in pipe0 or pipe1. */
6290 spu_sms_res_mii (struct ddg *g)
6293 unsigned t[4] = {0, 0, 0, 0};
6295 for (i = 0; i < g->num_nodes; i++)
6297 rtx insn = g->nodes[i].insn;
6298 int p = get_pipe (insn) + 2;
6304 if (dump_file && INSN_P (insn))
6305 fprintf (dump_file, "i%d %s %d %d\n",
6307 insn_data[INSN_CODE(insn)].name,
6311 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6313 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6318 spu_init_expanders (void)
6320 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6321 * frame_pointer_needed is true. We don't know that until we're
6322 * expanding the prologue. */
6324 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6327 static enum machine_mode
6328 spu_libgcc_cmp_return_mode (void)
6331 /* For SPU word mode is TI mode so it is better to use SImode
6332 for compare returns. */
6336 static enum machine_mode
6337 spu_libgcc_shift_count_mode (void)
6339 /* For SPU word mode is TI mode so it is better to use SImode
6340 for shift counts. */
6344 /* An early place to adjust some flags after GCC has finished processing
6347 asm_file_start (void)
6349 /* Variable tracking should be run after all optimizations which
6350 change order of insns. It also needs a valid CFG. */
6351 spu_flag_var_tracking = flag_var_tracking;
6352 flag_var_tracking = 0;
6354 default_file_start ();
6357 /* Implement targetm.section_type_flags. */
6359 spu_section_type_flags (tree decl, const char *name, int reloc)
6361 /* .toe needs to have type @nobits. */
6362 if (strcmp (name, ".toe") == 0)
6364 return default_section_type_flags (decl, name, reloc);