+2002-04-29 Vladimir Makarov <vmakarov@redhat.com>
+
+ Merging code from dfa-branch:
+
+ 2002-04-24 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (output_reserv_sets): Fix typo.
+
+ 2002-04-23 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (output_reserv_sets): Remove
+ next_cycle_output_flag.
+
+ Thu Apr 18 08:57:06 2002 Jeffrey A Law (law@redhat.com)
+
+ * sched-rgn.c (init_ready_list): Make the DFA code handle
+ USE/CLOBBER insns in the same way as the traditional
+ scheduler.
+ (new_ready): Similarly..
+
+ 2002-04-17 Vladimir Makarov <vmakarov@redhat.com>
+
+ * haifa-sched.c (schedule_block): Change the DFA state only after
+ issuing insn.
+
+ Wed Apr 17 15:38:36 2002 Jeffrey A Law (law@redhat.com)
+
+ * pa.c (hppa_use_dfa_pipeline_interface): New function.
+ (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): Define.
+ (override_options): Add PA7300 scheduling support.
+ (pa_adjust_cost): Update various comments. Properly
+ handle anti and output dependencies when using the
+ DFA scheduler.
+ (pa_issue_rate): Add PA7300 scheduling support.
+ (pa_can_combine_p): Call extract_insn before calling
+ constrain_operands (taken from mainline tree).
+ * pa.h (enum processor_type): Add PROCESSOR_PA7300.
+ * pa.md (cpu attr): Add 7300. Rewrite pipeline
+ descriptions using DFA descriptions. Add PA7300
+ scheduling support.
+
+ 2002-03-30 David S. Miller <davem@redhat.com>
+
+ Add UltraSPARC-III DFA scheduling support.
+ * config/sparc/sparc.md (define_attr type): Add fpcrmove.
+ Update FP conditional move on register insn patterns to use it, as
+ appropriate.
+ (define_attr cpu): Add ultrasparc3.
+ (define_attr us3load_type): New, update integer load patterns to
+ set it, as appropriate.
+ (define_automaton): Add ultrasparc3_0 and ultrasparc3_1.
+ (rest): Add UltraSPARC3 scheduling description.
+ * config/sparc/sparc.h (TARGET_CPU_ultrasparc3): New.
+ (PROCESSOR_ULTRASPARC3): New.
+ ({ASM,CPP}_CPU64_DEFAULT_SPEC): Handle ultrasparc3.
+ ({ASM,CPP}_CPU_SPEC): Likewise.
+ (REGISTER_MOVE_COST): Likewise.
+ (RTX_COSTS): Likewise.
+ * config/sparc/sparc.c (sparc_override_options,
+ sparc_initialize_trampoline, sparc64_initialize_trampoline,
+ sparc_use_dfa_pipeline_interface, sparc_use_sched_lookahead,
+ sparc_issue_rate): Likewise.
+ * config/sparc/sol2.h: Likewise.
+ * config/sparc/sol2-sld-64.h: Likewise.
+ * config/sparc/linux64.h: Likewise.
+
+ 2002-03-22 Vladimir Makarov <vmakarov@redhat.com>
+
+ * doc/md.texi: Add comments about usage the latency time for the
+ different dependencies and about case when two or more conditions
+ in different define_insn_reservations returns TRUE for an insn.
+
+ * doc/md.texi: Add reference for automaton based pipeline
+ description.
+
+ 2002-03-04 Vladimir Makarov <vmakarov@redhat.com>
+
+ * doc/passes.texi: Add missed information about genattrtab.
+
+ 2002-03-01 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (output_automata_list_transition_code): Check
+ automata_list on NULL.
+
+ 2002-02-28 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (output_insn_code_cases,
+ output_automata_list_min_issue_delay_code,
+ output_automata_list_transition_code,
+ output_automata_list_state_alts_code): Comment the functions.
+
+ 2002-02-22 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (automata_list_el_t): New typedef.
+ (get_free_automata_list_el,free_automata_list_el,
+ free_automata_list, automata_list_hash, automata_list_eq_p,
+ initiate_automata_lists, automata_list_start, automata_list_add,
+ automata_list_finish, finish_automata_lists,
+ output_insn_code_cases, output_automata_list_min_issue_delay_code,
+ output_automata_list_transition_code,
+ output_automata_list_state_alts_code, add_automaton_state,
+ form_important_insn_automata_lists): New functions and prototypes.
+ (insn_reserv_decl): Add members important_automata_list and
+ processed_p.
+ (ainsn): Add members important_p.
+ (automata_list_el): New structure.
+ (first_free_automata_list_el, current_automata_list,
+ automata_list_table): New global variables.
+ (create_ainsns): Initiate member important_p.
+ (output_internal_min_issue_delay_func): Generate the switch and
+ call output_insn_code_cases.
+ (output_internal_trans_func, output_internal_state_alts_func):
+ Ditto.
+ (generate): Call initiate_automata_lists.
+ (automaton_states): New global variable.
+ (expand_automata): Call form_important_insn_automata_lists.
+ (write_automata): Call finish_automata_lists.
+
+ 2002-02-21 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (add_excls, add_presence_absence): Check that
+ cpu units in the sets belong the same automaton.
+
+ * rtl.def (EXCLUSION_SET, PRESENCE_SET, ABSENCE_SET): Add comment
+ about that cpu units in the sets belong the same automaton.
+
+ * doc/md.texi: Ditto.
+
+ 2001-12-20 Naveen Sharma,Nitin Gupta <naveens@noida.hcltech.com,niting@noida.hcltech.com>
+
+ * config/sh/sh.c (sh_use_dfa_interface): New function.
+
+ (sh_issue_rate): New Function.
+ TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE: define.
+ TARGET_SCHED_ISSUE_RATE: define.
+
+ * config/sh/sh.md: Add DFA based pipeline description for SH4.
+
+ (define_attr insn_class): New attribute used for DFA
+ scheduling.
+ (define_insn cmpgtsi_t): Set attribute insn_class mt_group.
+ (cmpgesi_t,cmpgtusi_t,cmpgeusi_t,cmpeqsi_t,
+ cmpeqdi_t): Likewise.
+
+ (add,addc1,addsi3,subc,subc1,*subsi3_internal,
+ negc,negsi2,ashldi3_k,lshrdi3_k,ashrdi3_k): Set insn_class
+ ex_group.
+ (iorsi3,rotlsi3_1,rotlsi3_31,rotlsi3_16): Likewise.
+
+ 2001-10-03 Vladimir Makarov <vmakarov@toke.toronto.redhat.com>
+
+ * haifa-sched.c (queue_to_ready): Remove unnecessary condition for
+ break.
+
+ 2001-10-03 Vladimir Makarov <vmakarov@toke.toronto.redhat.com>
+
+ * genautomata.c (DFA_INSN_CODES_LENGTH_VARIABLE_NAME): New macro.
+ (output_dfa_insn_code_func): Expand dfa_insn_codes if it is
+ necessary.
+ (output_dfa_start_func): Initiate new variable insn_codes_length,
+ (write_automata): Output definition of the new variable.
+
+ 2001-10-02 David S. Miller <davem@redhat.com>
+
+ * haifa-sched.c (advance_one_cycle): New function.
+ (schedule_block): Use it.
+ (queue_to_ready): Use it, and also make sure to advance the DFA
+ state on all stall cycles, not just those where insn_queue links
+ are found.
+
+ 2001-10-02 Richard Sandiford <rsandifo@redhat.com>
+
+ * haifa-sched.c (max_issue): Remove last_p argument. Only return
+ non-zero if the highest-priority instruction could be scheduled.
+ (choose_ready): Remove last argument from max_issue call.
+
+ 2001-09-28 David S. Miller <davem@redhat.com>
+
+ * config/sparc/sparc.c (sparc_use_sched_lookahead): Use 4 for
+ ultrasparc and 3 for other multi-issue sparcs.
+
+ 2001-09-27 David S. Miller <davem@redhat.com>
+
+ * config/sparc/sparc.md (cycle_display): New pattern.
+ * config/sparc/sparc.c (sparc_cycle_display): New.
+ (TARGET_SCHED_CYCLE_DISPLAY): Set it.
+
+ 2001-09-25 David S. Miller <davem@redhat.com>
+
+ Convert all of Sparc scheduling to DFA
+ * config/sparc/sparc.md: Kill all define_function_unit
+ directives and replace with DFA equivalent.
+ * config/sparc/sparc.c (ultrasparc_adjust_cost,
+ mark_ultrasparc_pipeline_state, ultra_cmove_results_ready_p,
+ ultra_fpmode_conflict_exists, ultra_find_type,
+ ultra_build_types_avail, ultra_flush_pipeline,
+ ultra_rescan_pipeline_state, ultrasparc_sched_reorder,
+ ultrasparc_variable_issue, ultrasparc_sched_init,
+ sparc_variable_issue, sparc_sched_reorder, ultra_code_from_mask,
+ ultra_schedule_insn, ultra_code_names, ultra_pipe_hist,
+ ultra_cur_hist, ultra_cycles_elapsed): Kill.
+ (sparc_use_dfa_pipeline_interface, sparc_use_sched_lookahead,
+ ultrasparc_store_bypass_p): New.
+ * config/sparc/sparc-protos.h (ultrasparc_store_bypass_p):
+ Declare.
+
+ 2001-09-24 David S. Miller <davem@redhat.com>
+
+ * haifa-sched.c (ready_remove): Fix thinko, we want to copy around
+ ready->vec[foo] not ready[foo].
+
+ 2001-09-07 Vladimir Makarov <vmakarov@redhat.com>
+
+ * doc/md.texi: Correct examples for define_insn_reservations
+ `mult' and `div'.
+
+ 2001-09-07 Vladimir Makarov <vmakarov@redhat.com>
+
+ * genautomata.c (create_automata): Print message about creation of
+ each automaton.
+ (generate): Remove printing meease about creation of
+ automata.
+
+ 2001-09-05 David S. Miller <davem@redhat.com>
+
+ * config/sparc/linux.h: Set CPLUSPLUS_CPP_SPEC.
+ * config/sparc/linux64.h: Likewise.
+
+ 2001-08-31 Vladimir Makarov <vmakarov@redhat.com>
+
+ * haifa-sched.c (insn_cost, schedule_insn, queue_to_ready,
+ schedule_block, sched_init, sched_finish): Add missed calls of
+ use_dfa_pipeline_interface.
+
+ * sched-rgn.c (init_ready_list, new_ready, debug_dependencies):
+ Ditto.
+
+ * sched-vis.c (get_visual_tbl_length): Ditto.
+
+ 2001-08-27 Richard Henderson <rth@redhat.com>
+
+ * genattr.c (main): Emit state_t even when not doing scheduling.
+
+ 2001-08-27 Richard Henderson <rth@redhat.com>
+
+ * genautomata.c (expand_automata): Always create a description.
+
+ 2001-08-27 Vladimir Makarov <vmakarov@touchme.toronto.redhat.com>
+
+ * rtl.def (DEFINE_CPU_UNIT, DEFINE_QUERY_CPU_UNIT, EXCLUSION_SET,
+ PRESENCE_SET, ABSENCE_SET, DEFINE_BYPASS, DEFINE_AUTOMATON,
+ AUTOMATA_OPTION, DEFINE_RESERVATION, DEFINE_INSN_RESERVATION): New
+ RTL constructions.
+
+ * genattr.c (main): New variable num_insn_reservations. Increase
+ it if there is DEFINE_INSN_RESERVATION. Output automaton based
+ pipeline hazard recognizer interface.
+
+ * genattrtab.h: New file.
+
+ * genattrtab.c: Include genattrtab.h.
+ (attr_printf, check_attr_test, make_internal_attr,
+ make_numeric_value): Move protypes into genattrtab.h. Define them
+ as external.
+ (num_dfa_decls): New global variable.
+ (main): Process DEFINE_CPU_UNIT, DEFINE_QUERY_CPU_UNIT,
+ DEFINE_BYPASS, EXCLUSION_SET, PRESENCE_SET, ABSENCE_SET,
+ DEFINE_AUTOMATON, AUTOMATA_OPTION, DEFINE_RESERVATION,
+ DEFINE_INSN_RESERVATION. Call expand_automata and write_automata.
+
+ * genautomata.c: New file.
+
+ * rtl.h (LINK_COST_ZERO, LINK_COST_FREE): Remove them.
+
+ * sched-int.h: (curr_state): Add the external definition for
+ automaton pipeline interface.
+ (haifa_insn_data): Add comments for members blockage and units.
+
+ * target-def.h (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE,
+ TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN,
+ TARGET_SCHED_DFA_PRE_CYCLE_INSN,
+ TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN,
+ TARGET_SCHED_DFA_POST_CYCLE_INSN,
+ TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD,
+ TARGET_SCHED_INIT_DFA_BUBBLES, TARGET_SCHED_DFA_BUBBLE): New
+ macros.
+ (TARGET_SCHED): Use the new macros.
+
+ * target.h (use_dfa_pipeline_interface, init_dfa_pre_cycle_insn,
+ dfa_pre_cycle_insn, init_dfa_post_cycle_insn, dfa_post_cycle_insn,
+ first_cycle_multipass_dfa_lookahead, init_dfa_bubbles,
+ dfa_bubble): New members in gcc_target.sched.
+
+ * haifa-sched.c (insert_schedule_bubbles_p): New variable.
+ (MAX_INSN_QUEUE_INDEX): New macro for automaton interface.
+ (insn_queue): Redefine it as pointer to array.
+ (NEXT_Q, NEXT_Q_AFTER): Use MAX_INSN_QUEUE_INDEX instead of
+ INSN_QUEUE_SIZE.
+ (max_insn_queue_index_macro_value): New variable.
+ (curr_state, dfa_state_size, ready_try): New varaibles for
+ automaton interface.
+ (ready_element, ready_remove, max_issue): New function prototypes
+ for automaton interface.
+ (choose_ready): New function prototype.
+ (insn_unit, blockage_range): Add comments.
+ (unit_last_insn, unit_tick, unit_n_insns): Define them for case
+ FUNCTION_UNITS_SIZE == 0.
+ (insn_issue_delay, actual_hazard_this_instance, schedule_unit,
+ actual_hazard, potential_hazard): Add comments.
+ (insn_cost): Use cost -1 as undefined value. Remove
+ LINK_COST_ZERO and LINK_COST_FREE. Add new code for automaton
+ pipeline interface.
+ (ready_element, ready_remove): New functions for automaton
+ interface.
+ (schedule_insn): Add new code for automaton pipeline interface.
+ (queue_to_ready): Add new code for automaton pipeline interface.
+ Use MAX_INSN_QUEUE_INDEX instead of INSN_QUEUE_SIZE.
+ (debug_ready_list): Print newline when the queue is empty.
+ (max_issue): New function for automaton pipeline interface.
+ (choose_ready): New function.
+ (schedule_block): Add new code for automaton pipeline interface.
+ Print ready list before scheduling each insn.
+ (sched_init): Add new code for automaton pipeline interface.
+ Initiate insn cost by -1.
+ (sched_finish): Free the current automaton state and finalize
+ automaton pipeline interface.
+
+ * sched-rgn.c: Include target.h.
+ (init_ready_list, new_ready, debug_dependencies): Add new code for
+ automaton pipeline interface.
+
+ * sched-vis.c: Include target.h.
+ (get_visual_tbl_length): Add code for automaton interface.
+ (target_units, print_block_visualization): Add comments.
+
+ * Makefile.in (GETRUNTIME, HASHTAB, HOST_GETRUNTIME, HOST_HASHTAB,
+ USE_HOST_GETRUNTIME, USE_HOST_HASHTAB, HOST_VARRAY): New variables.
+ (sched-rgn.o, sched-vis.o): Add new dependency file target.h.
+ (getruntime.o, genautomata.o): New entries.
+ (genattrtab.o): Add new dependency file genattrtab.h.
+ (genattrtab): Add new dependencies. Link it with `libm.a'.
+ (getruntime.o, hashtab.o): New entries for canadian cross.
+
+ * doc/md.texi: Description of automaton based model.
+
+ * doc/tm.texi (TARGET_SCHED_ISSUE_RATE, TARGET_SCHED_ADJUST_COST):
+ Add comments.
+ (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE,
+ TARGET_SCHED_DFA_PRE_CYCLE_INSN,
+ TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN,
+ TARGET_SCHED_DFA_POST_CYCLE_INSN,
+ TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN,
+ TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD,
+ TARGET_SCHED_INIT_DFA_BUBBLES, TARGET_SCHED_DFA_BUBBLE): The new
+ hook descriptions.
+ (TRADITIONAL_PIPELINE_INTERFACE, DFA_PIPELINE_INTERFACE,
+ MAX_DFA_ISSUE_RATE): New macro descriptions.
+
+ * doc/contrib.texi: Add dfa based scheduler contribution.
+
+ * doc/gcc.texi: Add more information about genattrtab.
+
Mon Apr 29 17:19:10 2002 Richard Kenner <kenner@vlsi1.ultra.nyu.edu>
* reload1.c (eliminate_regs, case SUBREG): Fix typo in
# List of internationalization subdirectories.
INTL_SUBDIRS = intl
+# Change this to a null string if obstacks are installed in the
+# system library.
+OBSTACK=obstack.o
+
+# The following object files is used by genautomata.
+GETRUNTIME = getruntime.o
+HASHTAB = hashtab.o
+
# The GC method to be used on this system.
GGC=@GGC@.o
# Native linker and preprocessor flags. For x-fragment overrides.
HOST_LDFLAGS=$(LDFLAGS)
HOST_CPPFLAGS=$(ALL_CPPFLAGS)
+HOST_OBSTACK=$(OBSTACK)
+HOST_VFPRINTF=$(VFPRINTF)
+HOST_DOPRINT=$(DOPRINT)
+HOST_GETRUNTIME=$(GETRUNTIME)
+HOST_HASHTAB=$(HASHTAB)
+HOST_STRSTR=$(STRSTR)
# Actual name to use when installing a native compiler.
GCC_INSTALL_NAME = `echo gcc|sed '$(program_transform_name)'`
LIBIBERTY = ../libiberty/libiberty.a
BUILD_LIBIBERTY = @FORBUILD@/libiberty/libiberty.a
-# Dependencies on the intl and portability libraries.
-LIBDEPS= $(INTLDEPS) $(LIBIBERTY)
+USE_HOST_OBSTACK= ` case "${HOST_OBSTACK}" in ?*) echo ${HOST_PREFIX}${HOST_OBSTACK} ;; esac `
+USE_HOST_VFPRINTF= ` case "${HOST_VFPRINTF}" in ?*) echo ${HOST_PREFIX}${HOST_VFPRINTF} ;; esac `
+USE_HOST_DOPRINT= ` case "${HOST_DOPRINT}" in ?*) echo ${HOST_PREFIX}${HOST_DOPRINT} ;; esac `
+USE_HOST_GETRUNTIME= ` case "${HOST_GETRUNTIME}" in ?*) echo ${HOST_PREFIX}${HOST_GETRUNTIME} ;; esac `
+USE_HOST_HASHTAB= ` case "${HOST_HASHTAB}" in ?*) echo ${HOST_PREFIX}${HOST_HASHTAB} ;; esac `
+USE_HOST_STRSTR= ` case "${HOST_STRSTR}" in ?*) echo ${HOST_PREFIX}${HOST_STRSTR} ;; esac `
+
+# Dependency on the intl, portability libraries, obstack or whatever
+# library facilities are not installed in the system libraries.
+# We don't use USE_* because backquote expansion doesn't work in deps.
+LIBDEPS= $(INTLLIBS) $(LIBIBERTY) $(OBSTACK) $(VFPRINTF) $(DOPRINT) $(STRSTR)
# Likewise, for use in the tools that must run on this machine
# even if we are cross-building GCC.
HOST_PRINT = $(HOST_PREFIX)print-rtl.o
HOST_ERRORS = $(HOST_PREFIX)errors.o
+HOST_VARRAY = $(HOST_PREFIX)varray.o
# Specify the directories to be searched for header files.
# Both . and srcdir are used, in that order,
ggc-none.o: ggc-none.c $(GCONFIG_H) $(SYSTEM_H) $(GGC_H)
$(CC) -c $(ALL_CFLAGS) -DGENERATOR_FILE $(ALL_CPPFLAGS) $(INCLUDES) $< $(OUTPUT_OPTION)
+obstack.o: $(srcdir)/../libiberty/obstack.c $(GCONFIG_H)
+ rm -f obstack.c
+ $(LN_S) $(srcdir)/../libiberty/obstack.c obstack.c
+ $(CC) -c $(ALL_CFLAGS) -DGENERATOR_FILE $(ALL_CPPFLAGS) $(INCLUDES) \
+ obstack.c $(OUTPUT_OPTION)
+
+getruntime.o: $(srcdir)/../libiberty/getruntime.c $(CONFIG_H)
+ rm -f getruntime.c
+ $(LN_S) $(srcdir)/../libiberty/getruntime.c getruntime.c
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) getruntime.c
+
prefix.o: prefix.c $(CONFIG_H) $(SYSTEM_H) Makefile prefix.h
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-DPREFIX=\"$(prefix)\" \
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h cselib.h $(PARAMS_H) $(TM_P_H)
sched-rgn.o : sched-rgn.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
$(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
- $(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H)
+ $(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H) $(TARGET_H)
sched-ebb.o : sched-ebb.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
$(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H)
sched-vis.o : sched-vis.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
- hard-reg-set.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(REGS_H) $(TM_P_H)
+ hard-reg-set.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(REGS_H) $(TM_P_H) \
+ $(TARGET_H)
final.o : final.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h intl.h \
$(REGS_H) $(RECOG_H) conditions.h insn-config.h $(INSN_ATTR_H) function.h \
real.h output.h hard-reg-set.h except.h debug.h xcoffout.h \
genattr.o : genattr.c $(RTL_H) $(HCONFIG_H) $(SYSTEM_H) errors.h gensupport.h
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genattr.c $(OUTPUT_OPTION)
-genattrtab$(build_exeext) : genattrtab.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBDEPS)
+genattrtab$(build_exeext) : genattrtab.o genautomata.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_VARRAY) $(HOST_PREFIX)$(HOST_GETRUNTIME) $(HOST_LIBDEPS)
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ \
- genattrtab.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBS)
+ genattrtab.o genautomata.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_VARRAY) $(USE_HOST_GETRUNTIME) $(HOST_LIBS) -lm
genattrtab.o : genattrtab.c $(RTL_H) $(OBSTACK_H) $(HCONFIG_H) \
- $(SYSTEM_H) errors.h $(GGC_H) gensupport.h
+ $(SYSTEM_H) errors.h $(GGC_H) gensupport.h genattrtab.h
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genattrtab.c $(OUTPUT_OPTION)
+genautomata.o : genautomata.c $(RTL_H) $(OBSTACK_H) $(HCONFIG_H) \
+ $(SYSTEM_H) errors.h varray.h hash.h genattrtab.h
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genautomata.c $(OUTPUT_OPTION)
+
genoutput$(build_exeext) : genoutput.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBDEPS)
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ \
genoutput.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBS)
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/bitmap.c > $(HOST_PREFIX)bitmap.c
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)bitmap.c $(OUTPUT_OPTION)
+$(HOST_PREFIX_1)obstack.o: $(srcdir)/../libiberty/obstack.c $(HCONFIG_H)
+ rm -f $(HOST_PREFIX)obstack.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/obstack.c > $(HOST_PREFIX)obstack.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)obstack.c $(OUTPUT_OPTION)
+
+$(HOST_PREFIX_1)getruntime.o: $(srcdir)/../libiberty/getruntime.c
+ rm -f $(HOST_PREFIX)getruntime.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/getruntime.c > $(HOST_PREFIX)getruntime.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)getruntime.c $(OUTPUT_OPTION)
+
+$(HOST_PREFIX_1)hashtab.o: $(srcdir)/../libiberty/hashtab.c
+ rm -f $(HOST_PREFIX)hashtab.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/hashtab.c > $(HOST_PREFIX)hashtab.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)hashtab.c $(OUTPUT_OPTION)
+
+$(HOST_PREFIX_1)vfprintf.o: $(srcdir)/../libiberty/vfprintf.c $(HCONFIG_H)
+ rm -f $(HOST_PREFIX)vfprintf.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/vfprintf.c > $(HOST_PREFIX)vfprintf.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)vfprintf.c $(OUTPUT_OPTION)
+
+$(HOST_PREFIX_1)doprint.o: doprint.c $(HCONFIG_H)
+ rm -f $(HOST_PREFIX)doprint.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/doprint.c > $(HOST_PREFIX)doprint.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)doprint.c $(OUTPUT_OPTION)
+
+$(HOST_PREFIX_1)strstr.o: $(srcdir)/../libiberty/strstr.c $(HCONFIG_H)
+ rm -f $(HOST_PREFIX)strstr.c
+ sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/strstr.c > $(HOST_PREFIX)strstr.c
+ $(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)strstr.c $(OUTPUT_OPTION)
+
$(HOST_PREFIX_1)errors.o: errors.c $(HCONFIG_H) $(SYSTEM_H) errors.h
rm -f $(HOST_PREFIX)errors.c
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/errors.c > $(HOST_PREFIX)errors.c
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)errors.c $(OUTPUT_OPTION)
+
+# This satisfies the dependency that we get if you cross-compile a compiler
+# that does not need to compile doprint or whatever.
+$(HOST_PREFIX_1):
+ $(STAMP) $(HOST_PREFIX_1)
+
$(HOST_PREFIX_1)ggc-none.o: ggc-none.c $(HCONFIG_H) $(SYSTEM_H) $(GCC_H)
rm -f $(HOST_PREFIX)ggc-none.c
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/ggc-none.c > $(HOST_PREFIX)ggc-none.c
#include "target.h"
#include "target-def.h"
+static int hppa_use_dfa_pipeline_interface PARAMS ((void));
+
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
+
+static int
+hppa_use_dfa_pipeline_interface ()
+{
+ return 1;
+}
+
#ifndef DO_FRAME_NOTES
#ifdef INCOMING_RETURN_ADDR_RTX
#define DO_FRAME_NOTES 1
pa_cpu_string = "7200";
pa_cpu = PROCESSOR_7200;
}
+ else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7300"))
+ {
+ pa_cpu_string = "7300";
+ pa_cpu = PROCESSOR_7300;
+ }
else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
{
pa_cpu_string = "8000";
}
else
{
- warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
+ warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
}
/* Set the instruction set architecture. */
{
case TYPE_FPLOAD:
/* This cost 3 cycles, not 2 as the md says for the
- 700 and 7100. */
+ 700 and 7100, 7100lc, 7200 and 7300. */
return cost + 1;
case TYPE_FPALU:
return cost;
}
}
+
+ /* A flop-flop true depenendency where the sizes of the operand
+ carrying the dependency is difference causes an additional
+ cycle stall on the 7100lc, 7200, and 7300. Similarly for
+ a fpload-flop true dependency. */
}
/* For other data dependencies, the default cost specified in the
preceding arithmetic operation has finished if
the target of the fpload is any of the sources
(or destination) of the arithmetic operation. */
- return cost - 1;
+ if (hppa_use_dfa_pipeline_interface ())
+ return insn_default_latency (dep_insn) - 1;
+ else
+ return cost - 1;
default:
return 0;
preceding divide or sqrt operation has finished if
the target of the ALU flop is any of the sources
(or destination) of the divide or sqrt operation. */
- return cost - 2;
+ if (hppa_use_dfa_pipeline_interface ())
+ return insn_default_latency (dep_insn) - 2;
+ else
+ return cost - 2;
default:
return 0;
/* A fpload can't be issued until one cycle before a
preceding arithmetic operation has finished if
the target of the fpload is the destination of the
- arithmetic operation. */
- return cost - 1;
+ arithmetic operation.
+
+ Exception: For PA7100LC, PA7200 and PA7300, the cost
+ is 3 cycles, unless they bundle together. We also
+ pay the penalty if the second insn is a fpload. */
+ if (hppa_use_dfa_pipeline_interface ())
+ return insn_default_latency (dep_insn) - 1;
+ else
+ return cost - 1;
default:
return 0;
preceding divide or sqrt operation has finished if
the target of the ALU flop is also the target of
the divide or sqrt operation. */
- return cost - 2;
+ if (hppa_use_dfa_pipeline_interface ())
+ return insn_default_latency (dep_insn) - 2;
+ else
+ return cost - 2;
default:
return 0;
case PROCESSOR_7100: return 2;
case PROCESSOR_7100LC: return 2;
case PROCESSOR_7200: return 2;
+ case PROCESSOR_7300: return 2;
case PROCESSOR_8000: return 4;
default:
PROCESSOR_7100,
PROCESSOR_7100LC,
PROCESSOR_7200,
+ PROCESSOR_7300,
PROCESSOR_8000
};
;;
;; FIXME: Add 800 scheduling for completeness?
-(define_attr "cpu" "700,7100,7100LC,7200,8000" (const (symbol_ref "pa_cpu_attr")))
+(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
;; Length (in # of bytes).
(define_attr "length" ""
(const_int 0)))
[(eq_attr "in_branch_delay" "true") (nil) (nil)])
-;; Function units of the HPPA. The following data is for the 700 CPUs
-;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.
-;; Scheduling instructions for PA-83 machines according to the Snake
-;; constraints shouldn't hurt.
-
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
-
-;; The integer ALU.
-;; (Noted only for documentation; units that take one cycle do not need to
-;; be specified.)
-
-;; (define_function_unit "alu" 1 0
-;; (and (eq_attr "type" "unary,shift,nullshift,binary,move,address")
-;; (eq_attr "cpu" "700"))
-;; 1 0)
-
-
;; Memory. Disregarding Cache misses, the Mustang memory times are:
;; load: 2, fpload: 3
;; store, fpstore: 3, no D-cache operations should be scheduled.
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "700")) 2 0)
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "700")) 3 3)
-
;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fdiv,dbl 12 MPY 12
;; fsqrt,sgl 14 MPY 14
;; fsqrt,dbl 18 MPY 18
+;;
+;; We don't model fmpyadd/fmpysub properly as those instructions
+;; keep both the FP ALU and MPY units busy. Given that these
+;; processors are obsolete, I'm not going to spend the time to
+;; model those instructions correctly.
-(define_function_unit "pa700fp_alu" 1 0
+(define_automaton "pa700")
+(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
+
+(define_insn_reservation "W0" 4
(and (eq_attr "type" "fpcc")
- (eq_attr "cpu" "700")) 4 2)
-(define_function_unit "pa700fp_alu" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W1" 3
(and (eq_attr "type" "fpalu")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W2" 3
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*2")
+
+(define_insn_reservation "W3" 10
(and (eq_attr "type" "fpdivsgl")
- (eq_attr "cpu" "700")) 10 10)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*10")
+
+(define_insn_reservation "W4" 12
(and (eq_attr "type" "fpdivdbl")
- (eq_attr "cpu" "700")) 12 12)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*12")
+
+(define_insn_reservation "W5" 14
(and (eq_attr "type" "fpsqrtsgl")
- (eq_attr "cpu" "700")) 14 14)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*14")
+
+(define_insn_reservation "W6" 18
(and (eq_attr "type" "fpsqrtdbl")
- (eq_attr "cpu" "700")) 18 18)
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*18")
+
+(define_insn_reservation "W7" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "700"))
+ "mem_700")
+
+(define_insn_reservation "W8" 3
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "700"))
+ "mem_700*3")
+
+(define_insn_reservation "W9" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "700"))
+ "dummy_700")
;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue
;; floating point computations with non-floating point computations (fp loads
;; and stores are not fp computations).
;;
-
;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
;; take two cycles, during which no Dcache operations should be scheduled.
;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC
;; all have the same memory characteristics if one disregards cache misses.
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "7100,7100LC")) 2 0)
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "7100,7100LC")) 2 2)
;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
;; Timings:
;; fdiv,dbl 15 DIV 15
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
+;;
+;; We don't really model the FP ALU/MPY units properly (they are
+;; distinct subunits in the FP unit). However, there can never be
+;; a functional unit; conflict given the latency and issue rates
+;; for those units.
-(define_function_unit "pa7100fp_alu" 1 0
- (and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_mpy" 1 0
- (and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_div" 1 0
+(define_automaton "pa7100")
+(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
+
+(define_insn_reservation "X0" 2
+ (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100,fpmac_7100")
+
+(define_insn_reservation "X1" 8
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100")) 8 8)
-(define_function_unit "pa7100fp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 15 15)
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
-;; To encourage dual issue we define function units corresponding to
-;; the instructions which can be dual issued. This is a rather crude
-;; approximation, the "pa7100nonflop" test in particular could be refined.
-(define_function_unit "pa7100flop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
+(define_insn_reservation "X2" 15
+ (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
-(define_function_unit "pa7100nonflop" 1 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
+(define_insn_reservation "X3" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100")
+(define_insn_reservation "X4" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100,mem_7100")
-;; Memory subsystem works just like 7100/7150 (except for cache miss times which
-;; we don't model here).
+(define_insn_reservation "X5" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100")
;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
-;; Note divides and sqrt flops lock the cpu until the flop is
-;; finished. fmpy and xmpyu (fmpyi) lock the cpu for one cycle.
-;; There's no way to avoid the penalty.
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fcpy 2 ALU 1
;; fdiv,dbl 15 DIV 15
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
-
-(define_function_unit "pa7100LCfp_alu" 1 0
+;;
+;; The PA7200 is just like the PA7100LC except that there is
+;; no store-store penalty.
+;;
+;; The PA7300 is just like the PA7200 except that there is
+;; no store-load penalty.
+;;
+;; Note there are some aspects of the 7100LC we are not modeling
+;; at the moment. I'll be reviewing the 7100LC scheduling info
+;; shortly and updating this description.
+;;
+;; load-load pairs
+;; store-store pairs
+;; fmpyadd,dbl
+;; fmpysub,dbl
+;; other issue modeling
+
+(define_automaton "pa7100lc")
+(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
+(define_cpu_unit "fpalu_7100lc,fpdivsqrt_7100lc,fpmul_7100lc" "pa7100lc")
+(define_cpu_unit "mem_7100lc" "pa7100lc")
+
+(define_insn_reservation "Y0" 2
(and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpalu_7100lc")
+
+(define_insn_reservation "Y1" 2
(and (eq_attr "type" "fpmulsgl")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpmul_7100lc")
+
+(define_insn_reservation "Y2" 3
(and (eq_attr "type" "fpmuldbl")
- (eq_attr "cpu" "7100LC,7200")) 3 2)
-(define_function_unit "pa7100LCfp_div" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpmul_7100lc,fpmul_7100lc")
+
+(define_insn_reservation "Y3" 8
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100LC,7200")) 8 8)
-(define_function_unit "pa7100LCfp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 15 15)
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*7")
-;; Define the various functional units for dual-issue.
+(define_insn_reservation "Y4" 15
+ (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*14")
-;; There's only one floating point unit.
-(define_function_unit "pa7100LCflop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y5" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc+mem_7100lc")
-;; Shifts and memory ops execute in only one of the integer ALUs
-(define_function_unit "pa7100LCshiftmem" 1 1
- (and
- (eq_attr "type" "shift,nullshift,load,fpload,store,fpstore")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y6" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7100LC"))
+ "i1_7100lc+mem_7100lc,mem_7100lc")
-;; We have two basic ALUs.
-(define_function_unit "pa7100LCalu" 2 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y7" 1
+ (and (eq_attr "type" "shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc")
-;; I don't have complete information on the PA7200; however, most of
-;; what I've heard makes it look like a 7100LC without the store-store
-;; penalty. So that's how we'll model it.
+(define_insn_reservation "Y8" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "(i0_7100lc|i1_7100lc)")
-;; Memory. Disregarding Cache misses, memory loads and stores take
-;; two cycles. Any special cases are handled in pa_adjust_cost.
-(define_function_unit "pa7200memory" 1 0
- (and (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "7200")) 2 0)
+;; The 7200 has a store-load penalty
+(define_insn_reservation "Y9" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7200"))
+ "i0_7100lc,mem_7100lc")
-;; I don't have detailed information on the PA7200 FP pipeline, so I
-;; treat it just like the 7100LC pipeline.
-;; Similarly for the multi-issue fake units.
+;; The 7300 has no penalty for store-store or store-load
+(define_insn_reservation "YA" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7300"))
+ "i0_7100lc")
-;;
;; Scheduling for the PA8000 is somewhat different than scheduling for a
;; traditional architecture.
;;
;; The PA8000 has a large (56) entry reorder buffer that is split between
;; memory and non-memory operations.
;;
-;; The PA800 can issue two memory and two non-memory operations per cycle to
-;; the function units. Similarly, the PA8000 can retire two memory and two
-;; non-memory operations per cycle.
+;; The PA8000 can issue two memory and two non-memory operations per cycle to
+;; the function units, with the exception of branches and multi-output
+;; instructions. The PA8000 can retire two non-memory operations per cycle
+;; and two memory operations per cycle, only one of which may be a store.
;;
;; Given the large reorder buffer, the processor can hide most latencies.
;; According to HP, they've got the best results by scheduling for retirement
;; bandwidth with limited latency scheduling for floating point operations.
;; Latency for integer operations and memory references is ignored.
;;
-;; We claim floating point operations have a 2 cycle latency and are
-;; fully pipelined, except for div and sqrt which are not pipelined.
;;
-;; It is not necessary to define the shifter and integer alu units.
+;; We claim floating point operations have a 2 cycle latency and are
+;; fully pipelined, except for div and sqrt which are not pipelined and
+;; take from 17 to 31 cycles to complete.
;;
-;; These first two define_unit_unit descriptions model retirement from
-;; the reorder buffer.
-(define_function_unit "pa8000lsu" 2 1
+;; It's worth noting that there is no way to saturate all the functional
+;; units on the PA8000 as there is not enough issue bandwidth.
+
+(define_automaton "pa8000")
+(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
+(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
+(define_cpu_unit "store_8000" "pa8000")
+(define_cpu_unit "f0_8000, f1_8000" "pa8000")
+(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
+(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
+(define_reservation "im_8000" "im0_8000 | im1_8000")
+(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
+(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
+(define_reservation "f_8000" "f0_8000 | f1_8000")
+(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
+
+;; We can issue any two memops per cycle, but we can only retire
+;; one memory store per cycle. We assume that the reorder buffer
+;; will hide any memory latencies per HP's recommendation.
+(define_insn_reservation "Z0" 0
(and
- (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
+ (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000")
-(define_function_unit "pa8000alu" 2 1
+(define_insn_reservation "Z1" 0
(and
- (eq_attr "type" "!load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
-
-;; Claim floating point ops have a 2 cycle latency, excluding div and
-;; sqrt, which are not pipelined and issue to different units.
-(define_function_unit "pa8000fmac" 2 0
+ (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000+store_8000")
+
+;; We can issue and retire two non-memory operations per cycle with
+;; a few exceptions (branches). This group catches those we want
+;; to assume have zero latency.
+(define_insn_reservation "Z2" 0
(and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "8000")) 2 1)
+ (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
+;; Branches use both slots in the non-memory issue and
+;; retirement unit.
+(define_insn_reservation "Z3" 0
(and
- (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "8000")) 17 17)
+ (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+ (eq_attr "cpu" "8000"))
+ "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
+
+;; We partial latency schedule the floating point units.
+;; They can issue/retire two at a time in the non-memory
+;; units. We fix their latency at 2 cycles and they
+;; are fully pipelined.
+(define_insn_reservation "Z4" 1
+ (and
+ (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,f_8000,rnm_8000")
+
+;; The fdivsqrt units are not pipelined and have a very long latency.
+;; To keep the DFA from exploding, we do not show all the
+;; reservations for the divsqrt unit.
+(define_insn_reservation "Z5" 17
+ (and
+ (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+(define_insn_reservation "Z6" 31
+ (and
+ (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
- (and
- (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "8000")) 31 31)
\f
;; Compare instructions.
static void sh_asm_named_section PARAMS ((const char *, unsigned int));
#endif
static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+static int sh_use_dfa_interface PARAMS ((void));
+static int sh_issue_rate PARAMS ((void));
+
static bool sh_cannot_modify_jumps_p PARAMS ((void));
static bool sh_ms_bitfield_layout_p PARAMS ((tree));
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
+ sh_use_dfa_interface
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
+
#undef TARGET_CANNOT_MODIFY_JUMPS_P
#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
}
+/* This Function Returns non zero if DFA based scheduler
+ interface is to be used.At present supported only for
+ SH4. */
+static int
+sh_use_dfa_interface()
+{
+ if (TARGET_SH4)
+ return 1;
+ else
+ return 0;
+}
+
+/* This function returns "2" that signifies dual issue
+ for SH4 processor.To be used by DFA pipeline description. */
+static int
+sh_issue_rate()
+{
+ if(TARGET_SH4)
+ return 2;
+ else
+ return 1;
+}
+
/* SHmedia requires registers for branches, so we can't generate new
branches past reload. */
static bool
"cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pt,ptabs,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil"
(const_string "other"))
+;; We define a new attribute namely "insn_class".We use
+;; this for DFA based pipeline description.
+;; Although the "type" attribute covers almost all insn
+;; classes,it is more convenient to define new attribute
+;; for certain reservations.
+;;
+;; mt_group SH4 "mt" group instructions.
+;;
+;; ex_group SH4 "ex" group instructions.They mostly
+;; overlap with arithmetic instructions but
+;; new attribute defined to distinguish from
+;; mt group instructions.
+;;
+;; lds_to_fpscr The "type" attribute couldn't sufficiently
+;; distinguish it from others.It is part of
+;; new attribute.Similar case with ldsmem_to_fpscr
+;; and cwb.
+
+(define_attr "insn_class"
+ "mt_group,ex_group,lds_to_fpscr,ldsmem_to_fpscr,cwb,none"
+ (const_string "none"))
+
;; Indicate what precision must be selected in fpscr for this insn, if any.
(define_attr "fp_mode" "single,double,none" (const_string "none"))
(match_operand:SI 1 "arith_operand" "L,r"))
(const_int 0)))]
"TARGET_SH1"
- "tst %1,%0")
+ "tst %1,%0"
+ [(set_attr "insn_class" "mt_group")])
;; ??? Perhaps should only accept reg/constant if the register is reg 0.
;; That would still allow reload to create cmpi instructions, but would
"@
tst %0,%0
cmp/eq %1,%0
- cmp/eq %1,%0")
+ cmp/eq %1,%0"
+ [(set_attr "insn_class" "mt_group,mt_group,mt_group")])
(define_insn "cmpgtsi_t"
[(set (reg:SI T_REG)
"TARGET_SH1"
"@
cmp/gt %1,%0
- cmp/pl %0")
+ cmp/pl %0"
+ [(set_attr "insn_class" "mt_group,mt_group")])
(define_insn "cmpgesi_t"
[(set (reg:SI T_REG)
"TARGET_SH1"
"@
cmp/ge %1,%0
- cmp/pz %0")
-\f
+ cmp/pz %0"
+ [(set_attr "insn_class" "mt_group,mt_group")])
+
;; -------------------------------------------------------------------------
;; SImode unsigned integer comparisons
;; -------------------------------------------------------------------------
(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
(match_operand:SI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
- "cmp/hs %1,%0")
+ "cmp/hs %1,%0"
+ [(set_attr "insn_class" "mt_group")])
(define_insn "cmpgtusi_t"
[(set (reg:SI T_REG)
(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
(match_operand:SI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
- "cmp/hi %1,%0")
+ "cmp/hi %1,%0"
+ [(set_attr "insn_class" "mt_group")])
;; We save the compare operands in the cmpxx patterns and use them when
;; we generate the branch.
(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
"TARGET_SH1"
"addc %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "addc1"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(clobber (reg:SI T_REG))]
"TARGET_SH1"
"addc %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_expand "addsi3"
[(set (match_operand:SI 0 "arith_reg_operand" "")
(match_operand:SI 2 "arith_operand" "rI")))]
"TARGET_SH1"
"add %2,%0"
- [(set_attr "type" "arith")])
-\f
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
+
;; -------------------------------------------------------------------------
;; Subtraction instructions
;; -------------------------------------------------------------------------
(gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
"TARGET_SH1"
"subc %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "subc1"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(clobber (reg:SI T_REG))]
"TARGET_SH1"
"subc %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "*subsi3_internal"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(match_operand:SI 2 "arith_reg_operand" "r")))]
"TARGET_SH1"
"sub %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "*subsi3_media"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(match_operand:SI 2 "logical_operand" "r,L")))]
"TARGET_SH1"
"and %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
;; If the constant is 255, then emit a extu.b instruction instead of an
;; and, since that will give better code.
(match_operand:SI 2 "logical_operand" "r,L")))]
"TARGET_SH1"
"or %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "iordi3"
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
(match_operand:SI 2 "logical_operand" "L,r")))]
"TARGET_SH1"
"xor %2,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "xordi3"
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
(lshiftrt:SI (match_dup 1) (const_int 31)))]
"TARGET_SH1"
"rotl %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "rotlsi3_31"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(clobber (reg:SI T_REG))]
"TARGET_SH1"
"rotr %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "rotlsi3_16"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(const_int 16)))]
"TARGET_SH1"
"swap.w %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_expand "rotlsi3"
[(set (match_operand:SI 0 "arith_reg_operand" "")
(const_int 8)))]
"TARGET_SH1"
"swap.b %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_expand "rotlhi3"
[(set (match_operand:HI 0 "arith_reg_operand" "")
(clobber (match_dup 4))])]
"operands[4] = gen_rtx_SCRATCH (SImode);"
[(set_attr "length" "*,*,*,4")
- (set_attr "type" "dyn_shift,arith,arith,arith")])
+ (set_attr "type" "dyn_shift,arith,arith,arith")
+ (set_attr "insn_class" "ex_group,ex_group,ex_group,ex_group")])
(define_insn "ashlhi3_k"
[(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
"@
add %0,%0
shll%O2 %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "ashlsi3_n"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
(const_string "6")]
(const_string "8")))
- (set_attr "type" "arith")])
+ (set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_split
[(set (match_operand:SI 0 "arith_reg_operand" "")
(clobber (reg:SI T_REG))]
"TARGET_SH1 && INTVAL (operands[2]) == 1"
"shar %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
;; We can't do HImode right shifts correctly unless we start out with an
;; explicit zero / sign extension; doing that would result in worse overall
(lt:SI (match_dup 1) (const_int 0)))]
"TARGET_SH1"
"shll %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "ashrsi3_d"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
"TARGET_SH3"
"shad %2,%0"
- [(set_attr "type" "dyn_shift")])
+ [(set_attr "type" "dyn_shift")
+ (set_attr "insn_class" "ex_group")])
(define_insn "ashrsi3_n"
[(set (reg:SI R4_REG)
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
"TARGET_SH3"
"shld %2,%0"
- [(set_attr "type" "dyn_shift")])
+ [(set_attr "type" "dyn_shift")
+ (set_attr "insn_class" "ex_group")])
;; Only the single bit shift clobbers the T bit.
(clobber (reg:SI T_REG))]
"TARGET_SH1 && CONST_OK_FOR_M (INTVAL (operands[2]))"
"shlr %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "lshrsi3_k"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
"TARGET_SH1 && CONST_OK_FOR_K (INTVAL (operands[2]))
&& ! CONST_OK_FOR_M (INTVAL (operands[2]))"
"shlr%O2 %0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "lshrsi3_n"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
"TARGET_SH1"
"shll %R0\;rotcl %S0"
[(set_attr "length" "4")
- (set_attr "type" "arith")])
+ (set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "ashldi3_media"
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
"TARGET_SH1"
"shlr %S0\;rotcr %R0"
[(set_attr "length" "4")
- (set_attr "type" "arith")])
+ (set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "lshrdi3_media"
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
"TARGET_SH1"
"shar %S0\;rotcr %R0"
[(set_attr "length" "4")
- (set_attr "type" "arith")])
+ (set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "ashrdi3_media"
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
(const_int 16))))]
"TARGET_SH1"
"xtrct %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "xtrct_right"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(const_int 16))))]
"TARGET_SH1"
"xtrct %2,%0"
- [(set_attr "type" "arith")])
-\f
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
+
;; -------------------------------------------------------------------------
;; Unary arithmetic
;; -------------------------------------------------------------------------
(const_int 0)))]
"TARGET_SH1"
"negc %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "*negdi_media"
[(set (match_operand:DI 0 "arith_reg_operand" "=r")
(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
"neg %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "one_cmplsi2"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
"not %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_expand "one_cmpldi2"
[(set (match_operand:DI 0 "arith_reg_operand" "")
(zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
"extu.w %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "zero_extendqisi2"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
"extu.b %1,%0"
- [(set_attr "type" "arith")])
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
(define_insn "zero_extendqihi2"
[(set (match_operand:HI 0 "arith_reg_operand" "=r")
(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
"extu.b %1,%0"
- [(set_attr "type" "arith")])
-\f
+ [(set_attr "type" "arith")
+ (set_attr "insn_class" "ex_group")])
+
;; -------------------------------------------------------------------------
;; Sign extension instructions
;; -------------------------------------------------------------------------
"@
exts.w %1,%0
mov.w %1,%0"
- [(set_attr "type" "arith,load")])
+ [(set_attr "type" "arith,load")
+ (set_attr "insn_class" "ex_group,*")])
(define_insn "extendqisi2"
[(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
"@
exts.b %1,%0
mov.b %1,%0"
- [(set_attr "type" "arith,load")])
+ [(set_attr "type" "arith,load")
+ (set_attr "insn_class" "ex_group,*")])
(define_insn "extendqihi2"
[(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
"@
exts.b %1,%0
mov.b %1,%0"
- [(set_attr "type" "arith,load")])
-\f
+ [(set_attr "type" "arith,load")
+ (set_attr "insn_class" "ex_group,*")])
+
;; -------------------------------------------------------------------------
;; Move instructions
;; -------------------------------------------------------------------------
lds.l %1,%0
fake %1,%0"
[(set_attr "type" "pcload_si,move,*,load_si,move,prget,move,store,store,pstore,move,prset,load,pload,pcload_si")
+ (set_attr "insn_class" "*,*,mt_group,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
;; t/r must come after r/r, lest reload will try to reload stuff like
(clobber (match_scratch:SI 2 "=&r"))]
"TARGET_HARD_SH4"
"ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%1,%2\;mov.l\\t%0,@%2"
- [(set_attr "length" "8")])
+ [(set_attr "length" "8")
+ (set_attr "insn_class" "cwb")])
(define_insn "ic_invalidate_line_media"
[(unspec_volatile [(match_operand 0 "register_operand" "r")]
mov.l %1,%0
sts fpscr,%0"
[(set_attr "length" "0,2,2,4,2,2,2,2")
- (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")])
+ (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")
+ (set_attr "insn_class" "ldsmem_to_fpscr,*,*,lds_to_fpscr,*,*,*,*")])
(define_split
[(set (reg:PSI FPSCR_REG)
"TARGET_SH1"
"mov.l @r15+,r15\;mov.l @r15+,r0"
[(set_attr "length" "4")])
+
+;; The following description models the
+;; SH4 pipeline using the DFA based scheduler.
+;; The DFA based description is better way to model
+;; a superscalar pipeline as compared to function unit
+;; reservation model.
+;; 1. The function unit based model is oriented to describe at most one
+;; unit reservation by each insn. It is difficult to model unit reservations in multiple
+;; pipeline units by same insn. This can be done using DFA based description.
+;; 2. The execution performance of DFA based scheduler does not depend on processor complexity.
+;; 3. Writing all unit reservations for an instruction class is more natural description
+;; of the pipeline and makes interface of the hazard recognizer simpler than the
+;; old function unit based model.
+;; 4. The DFA model is richer and is a part of greater overall framework of RCSP.
+
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have.(Factoring)
+
+(define_automaton "inst_pipeline,fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+
+(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
+
+
+;; The fixed point arithmetic calculator(?? EX Unit).
+
+(define_cpu_unit "int" "inst_pipeline")
+
+;; f1_1 and f1_2 are floating point units.Actually there is
+;; a f1 unit which can overlap with other f1 unit but
+;; not another F1 unit.It is as though there were two
+;; f1 units.
+
+(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
+
+;; The floating point units.
+
+(define_cpu_unit "F1,F2,F3,FS" "fpu_pipe")
+
+;; This is basically the MA unit of SH4
+;; used in LOAD/STORE pipeline.
+
+(define_cpu_unit "memory" "inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved with "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation "issue" "pipe_01|pipe_02")
+
+;; This is to express the locking of D stage.
+
+(define_reservation "d_lock" "pipe_01+pipe_02")
+
+;; This is to simplify description where F1,F2,FS
+;; are used simultaneously.
+
+(define_reservation "fpu" "F1+F2+FS")
+
+;; This is to highlight the fact that f1
+;; cannot overlap with F1.
+
+(exclusion_set "f1_1,f1_2" "F1")
+
+;; Although reg moves have a latency of zero
+;; we need to highlight that they use D stage
+;; for one cycle.
+
+(define_insn_reservation "reg_mov" 0
+ (eq_attr "type" "move,fmove")
+ "issue")
+
+;; Other MT group intructions(1 step operations)
+;; Group: MT
+;; Latency: 1
+;; Issue Rate: 1
+
+(define_insn_reservation "mt" 1
+ (eq_attr "insn_class" "mt_group")
+ "issue,nothing")
+
+;; Fixed Point Arithmetic Instructions(1 step operations)
+;; Group: EX
+;; Latency: 1
+;; Issue Rate: 1
+
+(define_insn_reservation "simple_arith" 1
+ (eq_attr "insn_class" "ex_group")
+ "issue,int")
+
+;; Load Store instructions. (MOV.[BWL]@(d,GBR)
+;; Group: LS
+;; Latency: 2
+;; Issue Rate: 1
+
+(define_insn_reservation "load_store" 2
+ (eq_attr "type" "load,load_si,pcload,pcload_si,store")
+ "issue,memory*2")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Group: BR
+;; Latency: 2 (or 1) Actually Observed to be 5/7
+;; Issue Rate: 1
+;; The latency is 1 when displacement is 0.
+;; This reservation can be further broken into 2
+;; 1. branch_zero : One with latency 1 and in the TEST
+;; part it also checks for 0 (ZERO) displacement
+;; 2. branch: Latency 2.
+
+(define_insn_reservation "branch_zero" 5
+ (and (eq_attr "type" "cbranch")
+ (eq_attr "length" "2"))
+ "(issue+pcr_addrcalc),pcr_addrcalc,nothing")
+
+(define_insn_reservation "branch" 7
+ (eq_attr "type" "cbranch")
+ "(issue+pcr_addrcalc),pcr_addrcalc,nothing")
+
+;; Branch Far (JMP,RTS,BRAF)
+;; Group: CO
+;; Latency: 3
+;; Issue Rate: 2
+;; Since issue stage (D stage) is blocked for 2nd cycle,
+;; cpu_unit int is reserved since it might be required for far
+;; address calculation.
+
+(define_insn_reservation "branch_far" 12
+ (and (eq_attr "type" "jump,return")
+ (eq_attr "length" "6"))
+ "d_lock*2,int+pcr_addrcalc,pcr_addrcalc")
+
+;; RTE
+;; Group: CO
+;; atency: 5
+;; Issue Rate: 5
+;; this instruction can be executed in any of the pipelines
+;; and blocks the pipeline for next 4 stages.
+
+(define_insn_reservation "return_from_exp" 5
+ (eq_attr "type" "rte")
+ "(issue+pcr_addrcalc),d_lock*4,int+pcr_addrcalc,nothing")
+
+;; OCBP, OCBWB
+;; Group: CO
+;; Latency: 5
+;; Issue Rate: 1
+
+(define_insn_reservation "ocbwb" 5
+ (eq_attr "insn_class" "cwb")
+ "issue,(int+memory),memory*5")
+
+;; LDS to PR,JSR
+;; Group: CO
+;; Latency: 3
+;; Issue Rate: 2
+;; The SX stage is blocked for last 2 cycles.
+
+(define_insn_reservation "lds_to_pr" 3
+ (eq_attr "type" "prset,call,sfunc")
+ "(issue+pcr_addrcalc),(issue+int+pcr_addrcalc),(int+pcr_addrcalc)*2")
+
+;; LDS.L to PR
+;; Group: CO
+;; Latency: 3
+;; Issue Rate: 2
+;; The SX unit is blocked for last 2 cycles.
+
+(define_insn_reservation "ldsmem_to_pr" 3
+ (eq_attr "type" "pload")
+ "(issue+pcr_addrcalc),(issue+int+pcr_addrcalc),(int+memory+pcr_addrcalc),(int+pcr_addrcalc)")
+
+;; STS from PR
+;; Group: CO
+;; Latency: 2
+;; Issue Rate: 2
+;; The SX unit in second and third cycles.
+
+(define_insn_reservation "sts_from_pr" 2
+ (eq_attr "type" "prget")
+ "(issue+pcr_addrcalc),(pipe_01+int+pcr_addrcalc),(int+pcr_addrcalc),nothing")
+
+;; STS.L from PR
+;; Group: CO
+;; Latency: 2
+;; Issue Rate: 2
+
+(define_insn_reservation "prload_mem" 2
+ (eq_attr "type" "pstore")
+ "(issue+pcr_addrcalc),(pipe_01+int+pcr_addrcalc),(int+memory+pcr_addrcalc),memory")
+
+;; LDS to FPSCR
+;; Group: CO
+;; Latency: 4
+;; Issue Rate: 1
+;; F1 is blocked for last three cycles.
+
+(define_insn_reservation "fpscr_store" 4
+ (eq_attr "insn_class" "lds_to_fpscr")
+ "issue,int,F1*3")
+
+;; LDS.L to FPSCR
+;; Group: CO
+;; Latency: 1 / 4
+;; Latency to update Rn is 1 and latency to update FPSCR is 4
+;; Issue Rate: 1
+;; F1 is blocked for last three cycles.
+
+(define_insn_reservation "fpscr_store_mem" 4
+ (eq_attr "insn_class" "ldsmem_to_fpscr")
+ "issue,(int+memory),(F1+memory),F1*2")
+
+\f
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+;; Group: CO
+;; Latency: 4 / 4
+;; Issue Rate: 1
+
+(define_insn_reservation "multi" 4
+ (eq_attr "type" "smpy,dmpy")
+ "issue,(issue+int+f1_1),(int+f1_1),(f1_1|f1_2)*2,F2,FS")
+
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; Group: FE
+;; Latency: 4
+;; Issue Rate: 1
+
+(define_insn_reservation "fp_arith" 4
+ (eq_attr "type" "fp")
+ "issue,F1,F2,FS")
+
+;; Single Precision FDIV/SQRT
+;; Group: FE
+;; Latency: 12/13
+;; Issue Rate: 1
+
+(define_insn_reservation "fp_div" 13
+ (eq_attr "type" "fdiv")
+ "issue,F1+F3,F1+F2+F3,F3*7,F1+F3,F2,FS")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group: FE
+;; Latency: (3,4)/5
+;; Issue Rate: 1
+
+(define_insn_reservation "dp_float" 5
+ (eq_attr "type" "dfp_conv")
+ "issue,F1,F1+F2,F2+FS,FS")
+
+;; Double-precision floating-point (FADD ,FMUL,FSUB)
+;; Group: FE
+;; Latency: (7,8)/9
+;; Issue Rate: 1
+
+(define_insn_reservation "fp_double_arith" 9
+ (eq_attr "type" "dfp_arith")
+ "issue,F1,F1+F2,fpu*4,F2+FS,FS")
+
+;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
+;; Group: FE
+;; Latency: 3/5
+;; Issue Rate: 2
+
+(define_insn_reservation "fp_double_cmp" 5
+ (eq_attr "type" "dfp_cmp")
+ "issue,(issue+F1),F1+F2,F2+FS,FS")
+
+;; Double precision FDIV/SQRT
+;; Group: FE
+;; Latency: (24,25)/26
+;; Issue Rate: 1
+
+(define_insn_reservation "dp_div" 26
+ (eq_attr "type" "dfdiv")
+ "issue,F1+F3,F1+F2+F3,F2+F3+FS,F3*16,F1+F3,F1+F2+F3,fpu+F3,F2+FS,FS")
+
32 bits of REG are 0 before INSN. */
extern int sparc_check_64 PARAMS ((rtx, rtx));
extern rtx gen_df_reg PARAMS ((rtx, int));
+/* Used for DFA scheduling when cpu is ultrasparc. */
+extern int ultrasparc_store_bypass_p PARAMS ((rtx, rtx));
extern int sparc_extra_constraint_check PARAMS ((rtx, int, int));
#endif /* RTX_CODE */
static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
-static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static void sparc_output_addr_vec PARAMS ((rtx));
static void sparc_output_addr_diff_vec PARAMS ((rtx));
static void sparc_output_deferred_case_vectors PARAMS ((void));
static void sparc_add_gc_roots PARAMS ((void));
-static void mark_ultrasparc_pipeline_state PARAMS ((void *));
static int check_return_regs PARAMS ((rtx));
static int epilogue_renumber PARAMS ((rtx *, int));
static bool sparc_assemble_integer PARAMS ((rtx, unsigned int, int));
-static int ultra_cmove_results_ready_p PARAMS ((rtx));
-static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode));
-static rtx *ultra_find_type PARAMS ((int, rtx *, int));
-static void ultra_build_types_avail PARAMS ((rtx *, int));
-static void ultra_flush_pipeline PARAMS ((void));
-static void ultra_rescan_pipeline_state PARAMS ((rtx *, int));
static int set_extends PARAMS ((rtx));
static void output_restore_regs PARAMS ((FILE *, int));
static void sparc_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
#ifdef OBJECT_FORMAT_ELF
static void sparc_elf_asm_named_section PARAMS ((const char *, unsigned int));
#endif
-static void ultrasparc_sched_reorder PARAMS ((FILE *, int, rtx *, int));
-static int ultrasparc_variable_issue PARAMS ((rtx));
-static void ultrasparc_sched_init PARAMS ((void));
static int sparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static int sparc_issue_rate PARAMS ((void));
-static int sparc_variable_issue PARAMS ((FILE *, int, rtx, int));
static void sparc_sched_init PARAMS ((FILE *, int, int));
-static int sparc_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
+static int sparc_use_dfa_pipeline_interface PARAMS ((void));
+static int sparc_use_sched_lookahead PARAMS ((void));
+static rtx sparc_cycle_display PARAMS ((int, rtx));
\f
/* Option handling. */
#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE sparc_variable_issue
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT sparc_sched_init
-#undef TARGET_SCHED_REORDER
-#define TARGET_SCHED_REORDER sparc_sched_reorder
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE sparc_use_dfa_pipeline_interface
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
+#undef TARGET_SCHED_CYCLE_DISPLAY
+#define TARGET_SCHED_CYCLE_DISPLAY sparc_cycle_display
struct gcc_target targetm = TARGET_INITIALIZER;
\f
{ TARGET_CPU_supersparc, "supersparc" },
{ TARGET_CPU_v9, "v9" },
{ TARGET_CPU_ultrasparc, "ultrasparc" },
+ { TARGET_CPU_ultrasparc3, "ultrasparc3" },
{ 0, 0 }
};
const struct cpu_default *def;
/* Although insns using %y are deprecated, it is a clear win on current
ultrasparcs. */
|MASK_DEPRECATED_V8_INSNS},
+ /* TI ultrasparc III */
+ /* ??? Check if %y issue still holds true in ultra3. */
+ { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
{ 0, 0, 0, 0 }
};
const struct cpu_table *cpu;
target_flags &= ~MASK_STACK_BIAS;
/* Supply a default value for align_functions. */
- if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
+ if (align_functions == 0
+ && (sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_ULTRASPARC3))
align_functions = 32;
/* Validate PCC_STRUCT_RETURN. */
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
- if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ if (sparc_cpu != PROCESSOR_ULTRASPARC
+ && sparc_cpu != PROCESSOR_ULTRASPARC3)
emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
\f
}
static int
-ultrasparc_adjust_cost (insn, link, dep_insn, cost)
- rtx insn;
- rtx link;
- rtx dep_insn;
- int cost;
-{
- enum attr_type insn_type, dep_type;
- rtx pat = PATTERN(insn);
- rtx dep_pat = PATTERN (dep_insn);
-
- if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
- return cost;
-
- insn_type = get_attr_type (insn);
- dep_type = get_attr_type (dep_insn);
-
- /* Nothing issues in parallel with integer multiplies, so
- mark as zero cost since the scheduler can not do anything
- about it. */
- if (insn_type == TYPE_IMUL || insn_type == TYPE_IDIV)
- return 0;
-
-#define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
- dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
-
- switch (REG_NOTE_KIND (link))
- {
- case 0:
- /* Data dependency; DEP_INSN writes a register that INSN reads some
- cycles later. */
-
- if (dep_type == TYPE_CMOVE)
- {
- /* Instructions that read the result of conditional moves cannot
- be in the same group or the following group. */
- return cost + 1;
- }
-
- switch (insn_type)
- {
- /* UltraSPARC can dual issue a store and an instruction setting
- the value stored, except for divide and square root. */
- case TYPE_FPSTORE:
- if (! SLOW_FP (dep_type))
- return 0;
- return cost;
-
- case TYPE_STORE:
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
- return cost;
-
- if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
- /* The dependency between the two instructions is on the data
- that is being stored. Assume that the address of the store
- is not also dependent. */
- return 0;
- return cost;
-
- case TYPE_LOAD:
- case TYPE_SLOAD:
- case TYPE_FPLOAD:
- /* A load does not return data until at least 11 cycles after
- a store to the same location. 3 cycles are accounted for
- in the load latency; add the other 8 here. */
- if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
- {
- /* If the addresses are not equal this may be a false
- dependency because pointer aliasing could not be
- determined. Add only 2 cycles in that case. 2 is
- an arbitrary compromise between 8, which would cause
- the scheduler to generate worse code elsewhere to
- compensate for a dependency which might not really
- exist, and 0. */
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
- || GET_CODE (SET_SRC (pat)) != MEM
- || GET_CODE (SET_DEST (dep_pat)) != MEM
- || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
- XEXP (SET_DEST (dep_pat), 0)))
- return cost + 2;
-
- return cost + 8;
- }
- return cost;
-
- case TYPE_BRANCH:
- /* Compare to branch latency is 0. There is no benefit from
- separating compare and branch. */
- if (dep_type == TYPE_COMPARE)
- return 0;
- /* Floating point compare to branch latency is less than
- compare to conditional move. */
- if (dep_type == TYPE_FPCMP)
- return cost - 1;
- return cost;
-
- case TYPE_FPCMOVE:
- /* FMOVR class instructions can not issue in the same cycle
- or the cycle after an instruction which writes any
- integer register. Model this as cost 2 for dependent
- instructions. */
- if (dep_type == TYPE_IALU
- && cost < 2)
- return 2;
- /* Otherwise check as for integer conditional moves. */
-
- case TYPE_CMOVE:
- /* Conditional moves involving integer registers wait until
- 3 cycles after loads return data. The interlock applies
- to all loads, not just dependent loads, but that is hard
- to model. */
- if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
- return cost + 3;
- return cost;
-
- default:
- break;
- }
- break;
-
- case REG_DEP_ANTI:
- /* Divide and square root lock destination registers for full latency. */
- if (! SLOW_FP (dep_type))
- return 0;
- break;
-
- case REG_DEP_OUTPUT:
- /* IEU and FPU instruction that have the same destination
- register cannot be grouped together. */
- return cost + 1;
-
- default:
- break;
- }
-
- /* Other costs not accounted for:
- - Single precision floating point loads lock the other half of
- the even/odd register pair.
- - Several hazards associated with ldd/std are ignored because these
- instructions are rarely generated for V9.
- - The floating point pipeline can not have both a single and double
- precision operation active at the same time. Format conversions
- and graphics instructions are given honorary double precision status.
- - call and jmpl are always the first instruction in a group. */
-
- return cost;
-
-#undef SLOW_FP
-}
-
-static int
sparc_adjust_cost(insn, link, dep, cost)
rtx insn;
rtx link;
case PROCESSOR_SPARCLITE86X:
cost = hypersparc_adjust_cost (insn, link, dep, cost);
break;
- case PROCESSOR_ULTRASPARC:
- cost = ultrasparc_adjust_cost (insn, link, dep, cost);
- break;
default:
break;
}
return cost;
}
-/* This describes the state of the UltraSPARC pipeline during
- instruction scheduling. */
-
-#define TMASK(__x) ((unsigned)1 << ((int)(__x)))
-#define UMASK(__x) ((unsigned)1 << ((int)(__x)))
-
-enum ultra_code { NONE=0, /* no insn at all */
- IEU0, /* shifts and conditional moves */
- IEU1, /* condition code setting insns, calls+jumps */
- IEUN, /* all other single cycle ieu insns */
- LSU, /* loads and stores */
- CTI, /* branches */
- FPM, /* FPU pipeline 1, multiplies and divides */
- FPA, /* FPU pipeline 2, all other operations */
- SINGLE, /* single issue instructions */
- NUM_ULTRA_CODES };
-
-static enum ultra_code ultra_code_from_mask PARAMS ((int));
-static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code));
-
-static const char *const ultra_code_names[NUM_ULTRA_CODES] = {
- "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
- "FPM", "FPA", "SINGLE" };
-
-struct ultrasparc_pipeline_state {
- /* The insns in this group. */
- rtx group[4];
-
- /* The code for each insn. */
- enum ultra_code codes[4];
-
- /* Which insns in this group have been committed by the
- scheduler. This is how we determine how many more
- can issue this cycle. */
- char commit[4];
-
- /* How many insns in this group. */
- char group_size;
-
- /* Mask of free slots still in this group. */
- char free_slot_mask;
-
- /* The slotter uses the following to determine what other
- insn types can still make their way into this group. */
- char contents [NUM_ULTRA_CODES];
- char num_ieu_insns;
-};
-
-#define ULTRA_NUM_HIST 8
-static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
-static int ultra_cur_hist;
-static int ultra_cycles_elapsed;
-
-#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
-
-/* Given TYPE_MASK compute the ultra_code it has. */
-static enum ultra_code
-ultra_code_from_mask (type_mask)
- int type_mask;
-{
- if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
- return IEU0;
- else if (type_mask & (TMASK (TYPE_COMPARE) |
- TMASK (TYPE_CALL) |
- TMASK (TYPE_SIBCALL) |
- TMASK (TYPE_UNCOND_BRANCH)))
- return IEU1;
- else if (type_mask & TMASK (TYPE_IALU))
- return IEUN;
- else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
- TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
- TMASK (TYPE_FPSTORE)))
- return LSU;
- else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
- TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD)))
- return FPM;
- else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
- TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
- return FPA;
- else if (type_mask & TMASK (TYPE_BRANCH))
- return CTI;
-
- return SINGLE;
-}
-
-/* Check INSN (a conditional move) and make sure that it's
- results are available at this cycle. Return 1 if the
- results are in fact ready. */
-static int
-ultra_cmove_results_ready_p (insn)
- rtx insn;
+static void
+sparc_sched_init (dump, sched_verbose, max_ready)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ int max_ready ATTRIBUTE_UNUSED;
{
- struct ultrasparc_pipeline_state *up;
- int entry, slot;
-
- /* If this got dispatched in the previous
- group, the results are not ready. */
- entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
- up = &ultra_pipe_hist[entry];
- slot = 4;
- while (--slot >= 0)
- if (up->group[slot] == insn)
- return 0;
-
- return 1;
}
-
-/* Walk backwards in pipeline history looking for FPU
- operations which use a mode different than FPMODE and
- will create a stall if an insn using FPMODE were to be
- dispatched this cycle. */
+
static int
-ultra_fpmode_conflict_exists (fpmode)
- enum machine_mode fpmode;
+sparc_use_dfa_pipeline_interface ()
{
- int hist_ent;
- int hist_lim;
-
- hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
- if (ultra_cycles_elapsed < 4)
- hist_lim = ultra_cycles_elapsed;
- else
- hist_lim = 4;
- while (hist_lim > 0)
- {
- struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
- int slot = 4;
-
- while (--slot >= 0)
- {
- rtx insn = up->group[slot];
- enum machine_mode this_mode;
- rtx pat;
-
- if (! insn
- || GET_CODE (insn) != INSN
- || (pat = PATTERN (insn)) == 0
- || GET_CODE (pat) != SET)
- continue;
-
- this_mode = GET_MODE (SET_DEST (pat));
- if ((this_mode != SFmode
- && this_mode != DFmode)
- || this_mode == fpmode)
- continue;
-
- /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
- we will get a stall. Loads and stores are independent
- of these rules. */
- if (GET_CODE (SET_SRC (pat)) != ABS
- && GET_CODE (SET_SRC (pat)) != NEG
- && ((TMASK (get_attr_type (insn)) &
- (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD) |
- TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
- return 1;
- }
- hist_lim--;
- hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
- }
-
- /* No conflicts, safe to dispatch. */
+ if ((1 << sparc_cpu) &
+ ((1 << PROCESSOR_ULTRASPARC) | (1 << PROCESSOR_CYPRESS) |
+ (1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+ (1 << PROCESSOR_SPARCLITE86X) | (1 << PROCESSOR_TSC701) |
+ (1 << PROCESSOR_ULTRASPARC3)))
+ return 1;
return 0;
}
-/* Find an instruction in LIST which has one of the
- type attributes enumerated in TYPE_MASK. START
- says where to begin the search.
-
- NOTE: This scheme depends upon the fact that we
- have less than 32 distinct type attributes. */
-
-static int ultra_types_avail;
-
-static rtx *
-ultra_find_type (type_mask, list, start)
- int type_mask;
- rtx *list;
- int start;
-{
- int i;
-
- /* Short circuit if no such insn exists in the ready
- at the moment. */
- if ((type_mask & ultra_types_avail) == 0)
- return 0;
-
- for (i = start; i >= 0; i--)
- {
- rtx insn = list[i];
-
- if (recog_memoized (insn) >= 0
- && (TMASK(get_attr_type (insn)) & type_mask))
- {
- enum machine_mode fpmode = SFmode;
- rtx pat = 0;
- int slot;
- int check_depend = 0;
- int check_fpmode_conflict = 0;
-
- if (GET_CODE (insn) == INSN
- && (pat = PATTERN(insn)) != 0
- && GET_CODE (pat) == SET
- && !(type_mask & (TMASK (TYPE_STORE) |
- TMASK (TYPE_FPSTORE))))
- {
- check_depend = 1;
- if (GET_MODE (SET_DEST (pat)) == SFmode
- || GET_MODE (SET_DEST (pat)) == DFmode)
- {
- fpmode = GET_MODE (SET_DEST (pat));
- check_fpmode_conflict = 1;
- }
- }
-
- slot = 4;
- while(--slot >= 0)
- {
- rtx slot_insn = ultra_pipe.group[slot];
- rtx slot_pat;
-
- /* Already issued, bad dependency, or FPU
- mode conflict. */
- if (slot_insn != 0
- && (slot_pat = PATTERN (slot_insn)) != 0
- && ((insn == slot_insn)
- || (check_depend == 1
- && GET_CODE (slot_insn) == INSN
- && GET_CODE (slot_pat) == SET
- && ((GET_CODE (SET_DEST (slot_pat)) == REG
- && GET_CODE (SET_SRC (pat)) == REG
- && REGNO (SET_DEST (slot_pat)) ==
- REGNO (SET_SRC (pat)))
- || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
- && GET_CODE (SET_SRC (pat)) == SUBREG
- && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
- REGNO (SUBREG_REG (SET_SRC (pat)))
- && SUBREG_BYTE (SET_DEST (slot_pat)) ==
- SUBREG_BYTE (SET_SRC (pat)))))
- || (check_fpmode_conflict == 1
- && GET_CODE (slot_insn) == INSN
- && GET_CODE (slot_pat) == SET
- && (GET_MODE (SET_DEST (slot_pat)) == SFmode
- || GET_MODE (SET_DEST (slot_pat)) == DFmode)
- && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
- goto next;
- }
-
- /* Check for peculiar result availability and dispatch
- interference situations. */
- if (pat != 0
- && ultra_cycles_elapsed > 0)
- {
- rtx link;
-
- for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
- {
- rtx link_insn = XEXP (link, 0);
- if (GET_CODE (link_insn) == INSN
- && recog_memoized (link_insn) >= 0
- && (TMASK (get_attr_type (link_insn)) &
- (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
- && ! ultra_cmove_results_ready_p (link_insn))
- goto next;
- }
-
- if (check_fpmode_conflict
- && ultra_fpmode_conflict_exists (fpmode))
- goto next;
- }
-
- return &list[i];
- }
- next:
- ;
- }
+static int
+sparc_use_sched_lookahead ()
+{
+ if (sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_ULTRASPARC3)
+ return 4;
+ if ((1 << sparc_cpu) &
+ ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+ (1 << PROCESSOR_SPARCLITE86X)))
+ return 3;
return 0;
}
-static void
-ultra_build_types_avail (ready, n_ready)
- rtx *ready;
- int n_ready;
+static rtx
+sparc_cycle_display (clock, last)
+ int clock;
+ rtx last;
{
- int i = n_ready - 1;
-
- ultra_types_avail = 0;
- while(i >= 0)
- {
- rtx insn = ready[i];
+ if (reload_completed)
+ return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
+ else
+ return last;
+}
- if (recog_memoized (insn) >= 0)
- ultra_types_avail |= TMASK (get_attr_type (insn));
+/* Make sure that the dependency between OUT_INSN and
+ IN_INSN (a store) is on the store data not the address
+ operand(s) of the store. */
- i -= 1;
- }
-}
+int
+ultrasparc_store_bypass_p (out_insn, in_insn)
+ rtx out_insn, in_insn;
+{
+ rtx out_pat, in_pat;
+ unsigned int regno;
-/* Place insn pointed to my IP into the pipeline.
- Make element THIS of READY be that insn if it
- is not already. TYPE indicates the pipeline class
- this insn falls into. */
-static void
-ultra_schedule_insn (ip, ready, this, type)
- rtx *ip;
- rtx *ready;
- int this;
- enum ultra_code type;
-{
- int pipe_slot;
- char mask = ultra_pipe.free_slot_mask;
- rtx temp;
+ if (recog_memoized (in_insn) < 0)
+ return 0;
- /* Obtain free slot. */
- for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
- if ((mask & (1 << pipe_slot)) != 0)
- break;
- if (pipe_slot == 4)
+ if (get_attr_type (in_insn) != TYPE_STORE
+ && get_attr_type (in_insn) != TYPE_FPSTORE)
abort ();
- /* In it goes, and it hasn't been committed yet. */
- ultra_pipe.group[pipe_slot] = *ip;
- ultra_pipe.codes[pipe_slot] = type;
- ultra_pipe.contents[type] = 1;
- if (UMASK (type) &
- (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
- ultra_pipe.num_ieu_insns += 1;
+ out_pat = PATTERN (out_insn);
+ in_pat = PATTERN (in_insn);
- ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
- ultra_pipe.group_size += 1;
- ultra_pipe.commit[pipe_slot] = 0;
+ if ((GET_CODE (out_pat) != SET
+ && GET_CODE (out_pat) != PARALLEL)
+ || GET_CODE (in_pat) != SET)
+ abort ();
- /* Update ready list. */
- temp = *ip;
- while (ip != &ready[this])
+ if (GET_CODE (SET_SRC (in_pat)) == REG)
{
- ip[0] = ip[1];
- ++ip;
+ regno = REGNO (SET_SRC (in_pat));
}
- *ip = temp;
-}
-
-/* Advance to the next pipeline group. */
-static void
-ultra_flush_pipeline ()
-{
- ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
- ultra_cycles_elapsed += 1;
- memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
- ultra_pipe.free_slot_mask = 0xf;
-}
-
-/* Init our data structures for this current block. */
-static void
-ultrasparc_sched_init ()
-{
- memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist);
- ultra_cur_hist = 0;
- ultra_cycles_elapsed = 0;
- ultra_pipe.free_slot_mask = 0xf;
-}
-
-static void
-sparc_sched_init (dump, sched_verbose, max_ready)
- FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- int max_ready ATTRIBUTE_UNUSED;
-{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- ultrasparc_sched_init ();
-}
-
-/* INSN has been scheduled, update pipeline commit state
- and return how many instructions are still to be
- scheduled in this group. */
-static int
-ultrasparc_variable_issue (insn)
- rtx insn;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i, left_to_fire;
-
- left_to_fire = 0;
- for (i = 0; i < 4; i++)
+ else if (GET_CODE (SET_SRC (in_pat)) == SUBREG)
{
- if (up->group[i] == 0)
- continue;
-
- if (up->group[i] == insn)
- {
- up->commit[i] = 1;
- }
- else if (! up->commit[i])
- left_to_fire++;
+ regno = REGNO (SUBREG_REG (SET_SRC (in_pat)));
}
-
- return left_to_fire;
-}
-
-static int
-sparc_variable_issue (dump, sched_verbose, insn, cim)
- FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- rtx insn;
- int cim;
-{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- return ultrasparc_variable_issue (insn);
else
- return cim - 1;
-}
-
-/* In actual_hazard_this_instance, we may have yanked some
- instructions from the ready list due to conflict cost
- adjustments. If so, and such an insn was in our pipeline
- group, remove it and update state. */
-static void
-ultra_rescan_pipeline_state (ready, n_ready)
- rtx *ready;
- int n_ready;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i;
+ return 0;
- for (i = 0; i < 4; i++)
+ if (GET_CODE (out_pat) == PARALLEL)
{
- rtx insn = up->group[i];
- int j;
+ int i;
- if (! insn)
- continue;
+ for (i = 0; i < XVECLEN (out_pat, 0); i++)
+ {
+ rtx exp = XVECEXP (out_pat, 0, i);
- /* If it has been committed, then it was removed from
- the ready list because it was actually scheduled,
- and that is not the case we are searching for here. */
- if (up->commit[i] != 0)
- continue;
+ if (GET_CODE (exp) != SET)
+ return 0;
- for (j = n_ready - 1; j >= 0; j--)
- if (ready[j] == insn)
- break;
+ if (GET_CODE (SET_DEST (exp)) == REG
+ && regno == REGNO (SET_DEST (exp)))
+ return 1;
- /* If we didn't find it, toss it. */
- if (j < 0)
- {
- enum ultra_code ucode = up->codes[i];
-
- up->group[i] = 0;
- up->codes[i] = NONE;
- up->contents[ucode] = 0;
- if (UMASK (ucode) &
- (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
- up->num_ieu_insns -= 1;
-
- up->free_slot_mask |= (1 << i);
- up->group_size -= 1;
- up->commit[i] = 0;
+ if (GET_CODE (SET_DEST (exp)) == SUBREG
+ && regno == REGNO (SUBREG_REG (SET_DEST (exp))))
+ return 1;
}
}
-}
-
-static void
-ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
- FILE *dump;
- int sched_verbose;
- rtx *ready;
- int n_ready;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i, this_insn;
-
- if (sched_verbose)
+ else if (GET_CODE (SET_DEST (out_pat)) == REG)
{
- int n;
-
- fprintf (dump, "\n;;\tUltraSPARC Looking at [");
- for (n = n_ready - 1; n >= 0; n--)
- {
- rtx insn = ready[n];
- enum ultra_code ucode;
-
- if (recog_memoized (insn) < 0)
- continue;
- ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
- if (n != 0)
- fprintf (dump, "%s(%d) ",
- ultra_code_names[ucode],
- INSN_UID (insn));
- else
- fprintf (dump, "%s(%d)",
- ultra_code_names[ucode],
- INSN_UID (insn));
- }
- fprintf (dump, "]\n");
+ return regno == REGNO (SET_DEST (out_pat));
}
-
- this_insn = n_ready - 1;
-
- /* Skip over junk we don't understand. */
- while ((this_insn >= 0)
- && recog_memoized (ready[this_insn]) < 0)
- this_insn--;
-
- ultra_build_types_avail (ready, this_insn + 1);
-
- while (this_insn >= 0) {
- int old_group_size = up->group_size;
-
- if (up->group_size != 0)
- {
- int num_committed;
-
- num_committed = (up->commit[0] + up->commit[1] +
- up->commit[2] + up->commit[3]);
- /* If nothing has been commited from our group, or all of
- them have. Clear out the (current cycle's) pipeline
- state and start afresh. */
- if (num_committed == 0
- || num_committed == up->group_size)
- {
- ultra_flush_pipeline ();
- up = &ultra_pipe;
- old_group_size = 0;
- }
- else
- {
- /* OK, some ready list insns got requeued and thus removed
- from the ready list. Account for this fact. */
- ultra_rescan_pipeline_state (ready, n_ready);
-
- /* Something "changed", make this look like a newly
- formed group so the code at the end of the loop
- knows that progress was in fact made. */
- if (up->group_size != old_group_size)
- old_group_size = 0;
- }
- }
-
- if (up->group_size == 0)
- {
- /* If the pipeline is (still) empty and we have any single
- group insns, get them out now as this is a good time. */
- rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_IDIV) |
- TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
- TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, SINGLE);
- break;
- }
-
- /* If we are not in the process of emptying out the pipe, try to
- obtain an instruction which must be the first in it's group. */
- ip = ultra_find_type ((TMASK (TYPE_CALL) |
- TMASK (TYPE_SIBCALL) |
- TMASK (TYPE_CALL_NO_DELAY_SLOT) |
- TMASK (TYPE_UNCOND_BRANCH)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, IEU1);
- this_insn--;
- }
- else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
- TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD)),
- ready, this_insn)) != 0)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPM);
- this_insn--;
- }
- }
-
- /* Try to fill the integer pipeline. First, look for an IEU0 specific
- operation. We can't do more IEU operations if the first 3 slots are
- all full or we have dispatched two IEU insns already. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2
- && up->contents[IEU0] == 0
- && up->contents[IEUN] == 0)
- {
- rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, IEU0);
- this_insn--;
- }
- }
-
- /* If we can, try to find an IEU1 specific or an unnamed
- IEU instruction. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) |
- (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
- ready, this_insn);
- if (ip)
- {
- rtx insn = *ip;
-
- ultra_schedule_insn (ip, ready, this_insn,
- (!up->contents[IEU1]
- && get_attr_type (insn) == TYPE_COMPARE)
- ? IEU1 : IEUN);
- this_insn--;
- }
- }
-
- /* If only one IEU insn has been found, try to find another unnamed
- IEU operation or an IEU1 specific one. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2)
- {
- rtx *ip;
- int tmask = TMASK (TYPE_IALU);
-
- if (!up->contents[IEU1])
- tmask |= TMASK (TYPE_COMPARE);
- ip = ultra_find_type (tmask, ready, this_insn);
- if (ip)
- {
- rtx insn = *ip;
-
- ultra_schedule_insn (ip, ready, this_insn,
- (!up->contents[IEU1]
- && get_attr_type (insn) == TYPE_COMPARE)
- ? IEU1 : IEUN);
- this_insn--;
- }
- }
-
- /* Try for a load or store, but such an insn can only be issued
- if it is within' one of the first 3 slots. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->contents[LSU] == 0)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
- TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
- TMASK (TYPE_FPSTORE)), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, LSU);
- this_insn--;
- }
- }
-
- /* Now find FPU operations, first FPM class. But not divisions or
- square-roots because those will break the group up. Unlike all
- the previous types, these can go in any slot. */
- if (up->free_slot_mask != 0
- && up->contents[FPM] == 0)
- {
- rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPM);
- this_insn--;
- }
- }
-
- /* Continue on with FPA class if we have not filled the group already. */
- if (up->free_slot_mask != 0
- && up->contents[FPA] == 0)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
- TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPA);
- this_insn--;
- }
- }
-
- /* Finally, maybe stick a branch in here. */
- if (up->free_slot_mask != 0
- && up->contents[CTI] == 0)
- {
- rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
-
- /* Try to slip in a branch only if it is one of the
- next 2 in the ready list. */
- if (ip && ((&ready[this_insn] - ip) < 2))
- {
- ultra_schedule_insn (ip, ready, this_insn, CTI);
- this_insn--;
- }
- }
-
- up->group_size = 0;
- for (i = 0; i < 4; i++)
- if ((up->free_slot_mask & (1 << i)) == 0)
- up->group_size++;
-
- /* See if we made any progress... */
- if (old_group_size != up->group_size)
- break;
-
- /* Clean out the (current cycle's) pipeline state
- and try once more. If we placed no instructions
- into the pipeline at all, it means a real hard
- conflict exists with some earlier issued instruction
- so we must advance to the next cycle to clear it up. */
- if (up->group_size == 0)
- {
- ultra_flush_pipeline ();
- up = &ultra_pipe;
- }
- else
- {
- memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
- ultra_pipe.free_slot_mask = 0xf;
- }
- }
-
- if (sched_verbose)
+ else if (GET_CODE (SET_DEST (out_pat)) == SUBREG)
{
- int n, gsize;
-
- fprintf (dump, ";;\tUltraSPARC Launched [");
- gsize = up->group_size;
- for (n = 0; n < 4; n++)
- {
- rtx insn = up->group[n];
-
- if (! insn)
- continue;
-
- gsize -= 1;
- if (gsize != 0)
- fprintf (dump, "%s(%d) ",
- ultra_code_names[up->codes[n]],
- INSN_UID (insn));
- else
- fprintf (dump, "%s(%d)",
- ultra_code_names[up->codes[n]],
- INSN_UID (insn));
- }
- fprintf (dump, "]\n");
+ return regno == REGNO (SUBREG_REG (SET_DEST (out_pat)));
}
-}
-static int
-sparc_sched_reorder (dump, sched_verbose, ready, n_readyp, clock)
- FILE *dump;
- int sched_verbose;
- rtx *ready;
- int *n_readyp;
- int clock ATTRIBUTE_UNUSED;
-{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- ultrasparc_sched_reorder (dump, sched_verbose, ready, *n_readyp);
- return sparc_issue_rate ();
+ return 0;
}
-static int
+static int
sparc_issue_rate ()
{
switch (sparc_cpu)
{
- default:
- return 1;
- case PROCESSOR_V9:
+ default:
+ return 1;
+ case PROCESSOR_V9:
/* Assume V9 processors are capable of at least dual-issue. */
return 2;
- case PROCESSOR_SUPERSPARC:
- return 3;
+ case PROCESSOR_SUPERSPARC:
+ return 3;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
return 2;
- case PROCESSOR_ULTRASPARC:
- return 4;
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return 4;
}
}
emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
}
\f
-/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
- GC. */
-
-static void
-mark_ultrasparc_pipeline_state (arg)
- void *arg;
-{
- struct ultrasparc_pipeline_state *ups;
- size_t i;
-
- ups = (struct ultrasparc_pipeline_state *) arg;
- for (i = 0; i < ARRAY_SIZE (ups->group); ++i)
- ggc_mark_rtx (ups->group[i]);
-}
-
/* Called to register all of our global variables with the garbage
collector. */
ggc_add_rtx_root (&get_pc_symbol, 1);
ggc_add_rtx_root (&sparc_addr_diff_list, 1);
ggc_add_rtx_root (&sparc_addr_list, 1);
- ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist),
- sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state);
}
#ifdef OBJECT_FORMAT_ELF
#define TARGET_CPU_sparcv9 7 /* alias */
#define TARGET_CPU_sparc64 7 /* alias */
#define TARGET_CPU_ultrasparc 8
+#define TARGET_CPU_ultrasparc3 9
#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
- || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
#define CPP_CPU32_DEFAULT_SPEC ""
#define ASM_CPU32_DEFAULT_SPEC ""
#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
#else
%{mcpu=sparclite86x:-D__sparclite86x__} \
%{mcpu=v9:-D__sparc_v9__} \
%{mcpu=ultrasparc:-D__sparc_v9__} \
+%{mcpu=ultrasparc3:-D__sparc_v9__} \
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
"
%{mv8plus:-Av8plus} \
%{mcpu=v9:-Av9} \
%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
+%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
"
PROCESSOR_SPARCLET,
PROCESSOR_TSC701,
PROCESSOR_V9,
- PROCESSOR_ULTRASPARC
+ PROCESSOR_ULTRASPARC,
+ PROCESSOR_ULTRASPARC3
};
/* This is set from -m{cpu,tune}=xxx. */
(((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
|| (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
|| (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \
- ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
+ ? ((sparc_cpu == PROCESSOR_ULTRASPARC \
+ || sparc_cpu == PROCESSOR_ULTRASPARC3) ? 12 : 6) : 2)
/* Provide the cost of a branch. For pre-v9 processors we use
a value of 3 to take into account the potential annulling of
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
return (GET_MODE (X) == DImode ? \
COSTS_N_INSNS (34) : COSTS_N_INSNS (19)); \
+ if (sparc_cpu == PROCESSOR_ULTRASPARC3) \
+ return COSTS_N_INSNS (6); \
return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
case DIV: \
case UDIV: \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
return (GET_MODE (X) == DImode ? \
COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); \
+ if (sparc_cpu == PROCESSOR_ULTRASPARC3) \
+ return (GET_MODE (X) == DImode ? \
+ COSTS_N_INSNS (71) : COSTS_N_INSNS (40)); \
return COSTS_N_INSNS (25); \
/* Make FLOAT and FIX more expensive than CONST_DOUBLE,\
so that cse will favor the latter. */ \
;; 16 embmedany_textlo
;; 18 sethm
;; 19 setlo
+;; 20 cycle_display
;;
;; UNSPEC_VOLATILE: 0 blockage
;; 1 flush_register_windows
;; Attribute for cpu type.
;; These must match the values for enum processor_type in sparc.h.
-(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc"
+(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc,ultrasparc3"
(const (symbol_ref "sparc_cpu_attr")))
;; Attribute for the instruction set.
;; Insn type.
-;; If you add any new type here, please update ultrasparc_sched_reorder too.
(define_attr "type"
- "ialu,compare,shift,load,sload,store,uncond_branch,branch,call,sibcall,call_no_delay_slot,return,imul,idiv,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
+ "ialu,compare,shift,load,sload,store,uncond_branch,branch,call,sibcall,call_no_delay_slot,return,imul,idiv,fpload,fpstore,fp,fpmove,fpcmove,fpcrmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
(const_string "ialu"))
;; true if branch/call has empty delay slot and will emit a nop in it
;; FP precision.
(define_attr "fptype" "single,double" (const_string "single"))
+;; UltraSPARC-III integer load type.
+(define_attr "us3load_type" "2cycle,3cycle" (const_string "2cycle"))
+
(define_asm_attributes
[(set_attr "length" "2")
(set_attr "type" "multi")])
[(eq_attr "in_uncond_branch_delay" "true")
(nil) (nil)])
-;; Function units of the SPARC
-
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
+;; DFA scheduling on the SPARC
-;; The integer ALU.
-;; (Noted only for documentation; units that take one cycle do not need to
-;; be specified.)
+(define_automaton "cypress_0,cypress_1,supersparc_0,supersparc_1,hypersparc_0,hypersparc_1,sparclet,ultrasparc_0,ultrasparc_1,ultrasparc3_0,ultrasparc3_1")
-;; On the sparclite, integer multiply takes 1, 3, or 5 cycles depending on
-;; the inputs.
+;; Cypress scheduling
-;; ---- cypress CY7C602 scheduling:
-;; Memory with load-delay of 1 (i.e., 2 cycle load).
+(define_cpu_unit "cyp_memory, cyp_fpalu" "cypress_0")
+(define_cpu_unit "cyp_fpmds" "cypress_1")
-(define_function_unit "memory" 1 0
+(define_insn_reservation "cyp_load" 2
(and (eq_attr "cpu" "cypress")
(eq_attr "type" "load,sload,fpload"))
- 2 2)
-
-;; SPARC has two floating-point units: the FP ALU,
-;; and the FP MUL/DIV/SQRT unit.
-;; Instruction timings on the CY7C602 are as follows
-;; FABSs 4
-;; FADDs/d 5/5
-;; FCMPs/d 4/4
-;; FDIVs/d 23/37
-;; FMOVs 4
-;; FMULs/d 5/7
-;; FNEGs 4
-;; FSQRTs/d 34/63
-;; FSUBs/d 5/5
-;; FdTOi/s 5/5
-;; FsTOi/d 5/5
-;; FiTOs/d 9/5
-
-;; The CY7C602 can only support 2 fp isnsn simultaneously.
-;; More insns cause the chip to stall.
-
-(define_function_unit "fp_alu" 1 0
+ "cyp_memory, nothing")
+
+(define_insn_reservation "cyp_fp_alu" 5
(and (eq_attr "cpu" "cypress")
(eq_attr "type" "fp,fpmove"))
- 5 5)
+ "cyp_fpalu, nothing*3")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "cyp_fp_mult" 7
(and (eq_attr "cpu" "cypress")
(eq_attr "type" "fpmul"))
- 7 7)
+ "cyp_fpmds, nothing*5")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "cyp_fp_div" 37
(and (eq_attr "cpu" "cypress")
(eq_attr "type" "fpdivs,fpdivd"))
- 37 37)
+ "cyp_fpmds, nothing*35")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "cyp_fp_sqrt" 63
(and (eq_attr "cpu" "cypress")
(eq_attr "type" "fpsqrts,fpsqrtd"))
- 63 63)
+ "cyp_fpmds, nothing*61")
+
+;; SuperSPARC scheduling
+
+(define_cpu_unit "ss_memory, ss_shift, ss_iwport0, ss_iwport1" "supersparc_0")
+(define_cpu_unit "ss_fpalu" "supersparc_0")
+(define_cpu_unit "ss_fpmds" "supersparc_1")
-;; ----- The TMS390Z55 scheduling
-;; The Supersparc can issue 1 - 3 insns per cycle: up to two integer,
-;; one ld/st, one fp.
-;; Memory delivers its result in one cycle to IU, zero cycles to FP
+(define_reservation "ss_iwport" "(ss_iwport0 | ss_iwport1)")
-(define_function_unit "memory" 1 0
+(define_insn_reservation "ss_iuload" 1
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "load,sload"))
- 1 1)
+ "ss_memory")
-(define_function_unit "memory" 1 0
+;; Ok, fpu loads deliver the result in zero cycles. But we
+;; have to show the ss_memory reservation somehow, thus...
+(define_insn_reservation "ss_fpload" 0
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fpload"))
- 0 1)
+ "ss_memory")
-(define_function_unit "memory" 1 0
+(define_bypass 0 "ss_fpload" "ss_fp_alu,ss_fp_mult,ss_fp_divs,ss_fp_divd,ss_fp_sqrt")
+
+(define_insn_reservation "ss_store" 1
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "store,fpstore"))
- 1 1)
+ "ss_memory")
-(define_function_unit "shift" 1 0
+(define_insn_reservation "ss_ialu_shift" 1
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "shift"))
- 1 1)
-
-;; There are only two write ports to the integer register file
-;; A store also uses a write port
+ "ss_shift + ss_iwport")
-(define_function_unit "iwport" 2 0
+(define_insn_reservation "ss_ialu_any" 1
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "load,sload,store,shift,ialu"))
- 1 1)
-
-;; Timings; throughput/latency
-;; FADD 1/3 add/sub, format conv, compar, abs, neg
-;; FMUL 1/3
-;; FDIVs 4/6
-;; FDIVd 7/9
-;; FSQRTs 6/8
-;; FSQRTd 10/12
-;; IMUL 4/4
-
-(define_function_unit "fp_alu" 1 0
+ "ss_iwport")
+
+(define_insn_reservation "ss_fp_alu" 3
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fp,fpmove,fpcmp"))
- 3 1)
+ "ss_fpalu, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "ss_fp_mult" 3
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fpmul"))
- 3 1)
+ "ss_fpmds, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "ss_fp_divs" 6
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fpdivs"))
- 6 4)
+ "ss_fpmds*4, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "ss_fp_divd" 9
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fpdivd"))
- 9 7)
+ "ss_fpmds*7, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "ss_fp_sqrt" 12
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "fpsqrts,fpsqrtd"))
- 12 10)
+ "ss_fpmds*10, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "ss_imul" 4
(and (eq_attr "cpu" "supersparc")
(eq_attr "type" "imul"))
- 4 4)
+ "ss_fpmds*4")
-;; ----- hypersparc/sparclite86x scheduling
-;; The Hypersparc can issue 1 - 2 insns per cycle. The dual issue cases are:
-;; L-Ld/St I-Int F-Float B-Branch LI/LF/LB/II/IF/IB/FF/FB
-;; II/FF case is only when loading a 32 bit hi/lo constant
-;; Single issue insns include call, jmpl, u/smul, u/sdiv, lda, sta, fcmp
-;; Memory delivers its result in one cycle to IU
+;; HyperSPARC/sparclite86x scheduling
-(define_function_unit "memory" 1 0
+(define_cpu_unit "hs_memory,hs_branch,hs_shift,hs_fpalu" "hypersparc_0")
+(define_cpu_unit "hs_fpmds" "hypersparc_1")
+
+(define_insn_reservation "hs_load" 1
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "load,sload,fpload"))
- 1 1)
+ "hs_memory")
-(define_function_unit "memory" 1 0
+(define_insn_reservation "hs_store" 2
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "store,fpstore"))
- 2 1)
+ "hs_memory, nothing")
-(define_function_unit "sparclite86x_branch" 1 0
+(define_insn_reservation "hs_slbranch" 1
(and (eq_attr "cpu" "sparclite86x")
(eq_attr "type" "branch"))
- 1 1)
+ "hs_branch")
-;; integer multiply insns
-(define_function_unit "sparclite86x_shift" 1 0
+(define_insn_reservation "hs_slshift" 1
(and (eq_attr "cpu" "sparclite86x")
(eq_attr "type" "shift"))
- 1 1)
+ "hs_shift")
-(define_function_unit "fp_alu" 1 0
+(define_insn_reservation "hs_fp_alu" 1
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "fp,fpmove,fpcmp"))
- 1 1)
+ "hs_fpalu")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "hs_fp_mult" 1
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "fpmul"))
- 1 1)
+ "hs_fpmds")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "hs_fp_divs" 8
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "fpdivs"))
- 8 6)
+ "hs_fpmds*6, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "hs_fp_divd" 12
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "fpdivd"))
- 12 10)
+ "hs_fpmds*10, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "hs_fp_sqrt" 17
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "fpsqrts,fpsqrtd"))
- 17 15)
+ "hs_fpmds*15, nothing*2")
-(define_function_unit "fp_mds" 1 0
+(define_insn_reservation "hs_imul" 17
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
(eq_attr "type" "imul"))
- 17 15)
+ "hs_fpmds*15, nothing*2")
-;; ----- sparclet tsc701 scheduling
-;; The tsc701 issues 1 insn per cycle.
-;; Results may be written back out of order.
+;; Sparclet tsc701 scheduling
-;; Loads take 2 extra cycles to complete and 4 can be buffered at a time.
+(define_cpu_unit "sl_load0,sl_load1,sl_load2,sl_load3" "sparclet")
+(define_cpu_unit "sl_store,sl_imul" "sparclet")
-(define_function_unit "tsc701_load" 4 1
- (and (eq_attr "cpu" "tsc701")
- (eq_attr "type" "load,sload"))
- 3 1)
+(define_reservation "sl_load_any" "(sl_load0 | sl_load1 | sl_load2 | sl_load3)")
+(define_reservation "sl_load_all" "(sl_load0 + sl_load1 + sl_load2 + sl_load3)")
-;; Stores take 2(?) extra cycles to complete.
-;; It is desirable to not have any memory operation in the following 2 cycles.
-;; (??? or 2 memory ops in the case of std).
+(define_insn_reservation "sl_ld" 3
+ (and (eq_attr "cpu" "tsc701")
+ (eq_attr "type" "load,sload"))
+ "sl_load_any, sl_load_any, sl_load_any")
-(define_function_unit "tsc701_store" 1 0
+(define_insn_reservation "sl_st" 3
(and (eq_attr "cpu" "tsc701")
(eq_attr "type" "store"))
- 3 3
- [(eq_attr "type" "load,sload,store")])
+ "(sl_store+sl_load_all)*3")
-;; The multiply unit has a latency of 5.
-(define_function_unit "tsc701_mul" 1 0
+(define_insn_reservation "sl_imul" 5
(and (eq_attr "cpu" "tsc701")
(eq_attr "type" "imul"))
- 5 5)
-
-;; ----- The UltraSPARC-1 scheduling
-;; UltraSPARC has two integer units. Shift instructions can only execute
-;; on IE0. Condition code setting instructions, call, and jmpl (including
-;; the ret and retl pseudo-instructions) can only execute on IE1.
-;; Branch on register uses IE1, but branch on condition code does not.
-;; Conditional moves take 2 cycles. No other instruction can issue in the
-;; same cycle as a conditional move.
-;; Multiply and divide take many cycles during which no other instructions
-;; can issue.
-;; Memory delivers its result in two cycles (except for signed loads,
-;; which take one cycle more). One memory instruction can be issued per
-;; cycle.
-
-(define_function_unit "memory" 1 0
+ "sl_imul*5")
+
+;; UltraSPARC-I/II scheduling
+
+(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0");
+(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1")
+(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1")
+(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1")
+(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1")
+
+(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)")
+(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)")
+(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3")
+
+(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)")
+(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)")
+;; This is a simplified representation of the issue at hand.
+;; For most cases, going from one FP precision type insn to another
+;; just breaks up the insn group. However for some cases, such
+;; a situation causes the second insn to stall 2 more cycles.
+(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1")
+
+;; If we have to schedule an ieu1 specific instruction and we want
+;; to reserve the ieu0 unit as well, we must reserve it first. So for
+;; example we could not schedule this sequence:
+;; COMPARE IEU1
+;; IALU IEU0
+;; but we could schedule them together like this:
+;; IALU IEU0
+;; COMPARE IEU1
+;; This basically requires that ieu0 is reserved before ieu1 when
+;; it is required that both be reserved.
+(absence_set "us1_ieu0" "us1_ieu1")
+
+;; This defines the slotting order. Most IEU instructions can only
+;; execute in the first three slots, FPU and branches can go into
+;; any slot. We represent instructions which "break the group"
+;; as requiring reservation of us1_slot0.
+(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3")
+(absence_set "us1_slot1" "us1_slot2,us1_slot3")
+(absence_set "us1_slot2" "us1_slot3")
+
+(define_insn_reservation "us1_simple_ieuN" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "load,fpload"))
- 2 1)
+ (eq_attr "type" "ialu"))
+ "(us1_ieu0 | us1_ieu1) + us1_slot012")
-(define_function_unit "memory" 1 0
+(define_insn_reservation "us1_simple_ieu0" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "sload"))
- 3 1)
+ (eq_attr "type" "shift"))
+ "us1_ieu0 + us1_slot012")
-(define_function_unit "memory" 1 0
+(define_insn_reservation "us1_simple_ieu1" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "store,fpstore"))
- 1 1)
+ (eq_attr "type" "compare"))
+ "us1_ieu1 + us1_slot012")
-(define_function_unit "ieuN" 2 0
+(define_insn_reservation "us1_cmove" 2
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "ialu,shift,compare,call,sibcall,call_no_delay_slot,uncond_branch"))
- 1 1)
+ (eq_attr "type" "cmove"))
+ "us1_single_issue, nothing")
-(define_function_unit "ieu0" 1 0
+(define_insn_reservation "us1_imul" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "shift"))
- 1 1)
+ (eq_attr "type" "imul"))
+ "us1_single_issue")
-(define_function_unit "ieu0" 1 0
+(define_insn_reservation "us1_idiv" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "cmove"))
- 2 1)
+ (eq_attr "type" "idiv"))
+ "us1_single_issue")
-(define_function_unit "ieu1" 1 0
+;; For loads, the "delayed return mode" behavior of the chip
+;; is represented using the us1_load_writeback resource.
+(define_insn_reservation "us1_load" 2
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "compare,call,sibcall,call_no_delay_slot,uncond_branch"))
- 1 1)
+ (eq_attr "type" "load,fpload"))
+ "us1_lsu + us1_slot012, us1_load_writeback")
+
+(define_insn_reservation "us1_load_signed" 3
+ (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "sload"))
+ "us1_lsu + us1_slot012, nothing, us1_load_writeback")
-(define_function_unit "cti" 1 0
+(define_insn_reservation "us1_store" 1
+ (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "store,fpstore"))
+ "us1_lsu + us1_slot012")
+
+(define_insn_reservation "us1_branch" 1
(and (eq_attr "cpu" "ultrasparc")
(eq_attr "type" "branch"))
- 1 1)
-
-;; Timings; throughput/latency
-;; FMOV 1/1 fmov, fabs, fneg
-;; FMOVcc 1/2
-;; FADD 1/3 add/sub, format conv, compar
-;; FMUL 1/3
-;; FDIVs 12/12
-;; FDIVd 22/22
-;; FSQRTs 12/12
-;; FSQRTd 22/22
-;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
-;;
-;; FDIV{s,d}/FSQRT{s,d} are given their own unit since they only
-;; use the FPM multiplier for final rounding 3 cycles before the
-;; end of their latency and we have no real way to model that.
-;;
-;; ??? This is really bogus because the timings really depend upon
-;; who uses the result. We should record who the user is with
-;; more descriptive 'type' attribute names and account for these
-;; issues in ultrasparc_adjust_cost.
+ "us1_cti + us1_slotany")
-(define_function_unit "fadd" 1 0
+(define_insn_reservation "us1_call_jmpl" 1
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "fpmove"))
- 1 1)
+ (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+ "us1_cti + us1_ieu1 + us1_slot0")
+
+(define_insn_reservation "us1_fmov_single" 1
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpmove"))
+ (eq_attr "fptype" "single"))
+ "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fmov_double" 1
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpmove"))
+ (eq_attr "fptype" "double"))
+ "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fcmov_single" 2
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpcmove,fpcrmove"))
+ (eq_attr "fptype" "single"))
+ "us1_fpa + us1_fp_single + us1_slotany, nothing")
+
+(define_insn_reservation "us1_fcmov_double" 2
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpcmove,fpcrmove"))
+ (eq_attr "fptype" "double"))
+ "us1_fpa + us1_fp_double + us1_slotany, nothing")
+
+(define_insn_reservation "us1_faddsub_single" 4
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fp"))
+ (eq_attr "fptype" "single"))
+ "us1_fpa + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_faddsub_double" 4
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fp"))
+ (eq_attr "fptype" "double"))
+ "us1_fpa + us1_fp_double + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fpcmp_single" 1
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpcmp"))
+ (eq_attr "fptype" "single"))
+ "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fpcmp_double" 1
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpcmp"))
+ (eq_attr "fptype" "double"))
+ "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fmult_single" 4
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpmul"))
+ (eq_attr "fptype" "single"))
+ "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fmult_double" 4
+ (and (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpmul"))
+ (eq_attr "fptype" "double"))
+ "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+;; This is actually in theory dangerous, because it is possible
+;; for the chip to prematurely dispatch the dependant instruction
+;; in the G stage, resulting in a 9 cycle stall. However I have never
+;; been able to trigger this case myself even with hand written code,
+;; so it must require some rare complicated pipeline state.
+(define_bypass 3
+ "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double"
+ "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Floating point divide and square root use the multiplier unit
+;; for final rounding 3 cycles before the divide/sqrt is complete.
+
+(define_insn_reservation "us1_fdivs"
+ 13
+ (and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpdivs,fpsqrts"))
+ "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2"
+ )
-(define_function_unit "fadd" 1 0
+(define_bypass
+ 12
+ "us1_fdivs"
+ "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+(define_insn_reservation "us1_fdivd"
+ 23
(and (eq_attr "cpu" "ultrasparc")
+ (eq_attr "type" "fpdivd,fpsqrtd"))
+ "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2"
+ )
+(define_bypass
+ 22
+ "us1_fdivd"
+ "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store"
+ "ultrasparc_store_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us1_simple_ieu1" "us1_branch")
+
+;; UltraSPARC-III scheduling
+;;
+;; A much simpler beast, no silly slotting rules and both
+;; integer units are fully symmetric. It does still have
+;; single-issue instructions though.
+
+(define_cpu_unit "us3_a0,us3_a1,us3_ms,us3_br,us3_fpm" "ultrasparc3_0")
+(define_cpu_unit "us3_slot0,us3_slot1,us3_slot2,us3_slot3,us3_fpa" "ultrasparc3_1")
+(define_cpu_unit "us3_load_writeback" "ultrasparc3_1")
+
+(define_reservation "us3_slotany" "(us3_slot0 | us3_slot1 | us3_slot2 | us3_slot3)")
+(define_reservation "us3_single_issue" "us3_slot0 + us3_slot1 + us3_slot2 + us3_slot3")
+(define_reservation "us3_ax" "(us3_a0 | us3_a1)")
+
+(define_insn_reservation "us3_integer" 1
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "ialu,shift,compare"))
+ "us3_ax + us3_slotany")
+
+(define_insn_reservation "us3_cmove" 2
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "cmove"))
+ "us3_ms + us3_br + us3_slotany, nothing")
+
+;; ??? Not entirely accurate.
+;; ??? It can run from 6 to 9 cycles. The first cycle the MS pipe
+;; ??? is needed, and the instruction group is broken right after
+;; ??? the imul. Then 'helper' instructions are generated to perform
+;; ??? each further stage of the multiplication, each such 'helper' is
+;; ??? single group. So, the reservation aspect is represented accurately
+;; ??? here, but the variable cycles are not.
+;; ??? Currently I have no idea how to determine the variability, but once
+;; ??? known we can simply add a define_bypass or similar to model it.
+(define_insn_reservation "us3_imul" 6
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "imul"))
+ "us3_ms + us3_slotany, us3_single_issue*5")
+
+(define_insn_reservation "us3_idiv" 71
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "idiv"))
+ "us3_ms + us3_slotany, us3_single_issue*70")
+
+;; UltraSPARC-III has a similar load delay as UltraSPARC-I/II except
+;; that all loads except 32-bit/64-bit unsigned loads take the extra
+;; delay for sign/zero extension.
+(define_insn_reservation "us3_2cycle_load" 2
+ (and (eq_attr "cpu" "ultrasparc3")
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "us3load_type" "2cycle")))
+ "us3_ms + us3_slotany, us3_load_writeback")
+
+(define_insn_reservation "us3_load_delayed" 3
+ (and (eq_attr "cpu" "ultrasparc3")
+ (and (eq_attr "type" "load,sload")
+ (eq_attr "us3load_type" "3cycle")))
+ "us3_ms + us3_slotany, nothing, us3_load_writeback")
+
+(define_insn_reservation "us3_store" 1
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "store,fpstore"))
+ "us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_branch" 1
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "branch"))
+ "us3_br + us3_slotany")
+
+(define_insn_reservation "us3_call_jmpl" 1
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+ "us3_br + us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_fmov" 3
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "fpmove"))
+ "us3_fpa + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fcmov" 3
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpcmove"))
- 2 1)
+ "us3_fpa + us3_br + us3_slotany, nothing*2")
-(define_function_unit "fadd" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fcrmov" 3
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "fpcrmove"))
+ "us3_fpa + us3_ms + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_faddsub" 4
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fp"))
- 3 1)
+ "us3_fpa + us3_slotany, nothing*3")
-(define_function_unit "fadd" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fpcmp" 5
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpcmp"))
- 2 1)
+ "us3_fpa + us3_slotany, nothing*4")
-(define_function_unit "fmul" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fmult" 4
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpmul"))
- 3 1)
+ "us3_fpm + us3_slotany, nothing*3")
-(define_function_unit "fadd" 1 0
- (and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "fpcmove"))
- 2 1)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fdivs" 17
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpdivs"))
- 12 12)
+ "(us3_fpm + us3_slotany), us3_fpm*14, nothing*2")
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "fpdivd"))
- 22 22)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fsqrts" 20
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpsqrts"))
- 12 12)
+ "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ultrasparc")
+(define_insn_reservation "us3_fdivd" 20
+ (and (eq_attr "cpu" "ultrasparc3")
+ (eq_attr "type" "fpdivd"))
+ "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
+
+(define_insn_reservation "us3_fsqrtd" 29
+ (and (eq_attr "cpu" "ultrasparc3")
(eq_attr "type" "fpsqrtd"))
- 22 22)
+ "(us3_fpm + us3_slotany), us3_fpm*26, nothing*2")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us3_integer,us3_faddsub,us3_fmov,us3_fcmov,us3_fmult" "us3_store"
+ "ultrasparc_store_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us3_integer" "us3_branch")
+
+;; If FMOVfcc is user of FPCMP, latency is only 1 cycle.
+(define_bypass 1 "us3_fpcmp" "us3_fcmov")
+
\f
;; Compare instructions.
;; This controls RTL generation and register allocation.
mov\\t%1, %0
ldub\\t%1, %0
stb\\t%r1, %0"
- [(set_attr "type" "*,load,store")])
+ [(set_attr "type" "*,load,store")
+ (set_attr "us3load_type" "*,3cycle,*")])
(define_expand "movhi"
[(set (match_operand:HI 0 "general_operand" "")
sethi\\t%%hi(%a1), %0
lduh\\t%1, %0
sth\\t%r1, %0"
- [(set_attr "type" "*,*,load,store")])
+ [(set_attr "type" "*,*,load,store")
+ (set_attr "us3load_type" "*,*,3cycle,*")])
;; We always work with constants here.
(define_insn "*movhi_lo_sum"
"@
fmovrs%D1\\t%2, %3, %0
fmovrs%d1\\t%2, %4, %0"
- [(set_attr "type" "fpcmove")])
+ [(set_attr "type" "fpcrmove")])
(define_insn "movdf_cc_reg_sp64"
[(set (match_operand:DF 0 "register_operand" "=e,e")
"@
fmovrd%D1\\t%2, %3, %0
fmovrd%d1\\t%2, %4, %0"
- [(set_attr "type" "fpcmove")
+ [(set_attr "type" "fpcrmove")
(set_attr "fptype" "double")])
(define_insn "*movtf_cc_reg_hq_sp64"
"@
fmovrq%D1\\t%2, %3, %0
fmovrq%d1\\t%2, %4, %0"
- [(set_attr "type" "fpcmove")])
+ [(set_attr "type" "fpcrmove")])
(define_insn "*movtf_cc_reg_sp64"
[(set (match_operand:TF 0 "register_operand" "=e,e")
(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
""
"lduh\\t%1, %0"
- [(set_attr "type" "load")])
+ [(set_attr "type" "load")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "zero_extendqihi2"
[(set (match_operand:HI 0 "register_operand" "")
"@
and\\t%1, 0xff, %0
ldub\\t%1, %0"
- [(set_attr "type" "*,load")])
+ [(set_attr "type" "*,load")
+ (set_attr "us3load_type" "*,3cycle")])
(define_expand "zero_extendqisi2"
[(set (match_operand:SI 0 "register_operand" "")
"@
and\\t%1, 0xff, %0
ldub\\t%1, %0"
- [(set_attr "type" "*,load")])
+ [(set_attr "type" "*,load")
+ (set_attr "us3load_type" "*,3cycle")])
(define_expand "zero_extendqidi2"
[(set (match_operand:DI 0 "register_operand" "")
"@
and\\t%1, 0xff, %0
ldub\\t%1, %0"
- [(set_attr "type" "*,load")])
+ [(set_attr "type" "*,load")
+ (set_attr "us3load_type" "*,3cycle")])
(define_expand "zero_extendhidi2"
[(set (match_operand:DI 0 "register_operand" "")
(zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
"TARGET_ARCH64"
"lduh\\t%1, %0"
- [(set_attr "type" "load")])
+ [(set_attr "type" "load")
+ (set_attr "us3load_type" "3cycle")])
;; ??? Write truncdisi pattern using sra?
(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
""
"ldsh\\t%1, %0"
- [(set_attr "type" "sload")])
+ [(set_attr "type" "sload")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "extendqihi2"
[(set (match_operand:HI 0 "register_operand" "")
(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
""
"ldsb\\t%1, %0"
- [(set_attr "type" "sload")])
+ [(set_attr "type" "sload")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "")
(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
""
"ldsb\\t%1, %0"
- [(set_attr "type" "sload")])
+ [(set_attr "type" "sload")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "extendqidi2"
[(set (match_operand:DI 0 "register_operand" "")
(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
"TARGET_ARCH64"
"ldsb\\t%1, %0"
- [(set_attr "type" "sload")])
+ [(set_attr "type" "sload")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "extendhidi2"
[(set (match_operand:DI 0 "register_operand" "")
(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
"TARGET_ARCH64"
"ldsh\\t%1, %0"
- [(set_attr "type" "sload")])
+ [(set_attr "type" "sload")
+ (set_attr "us3load_type" "3cycle")])
(define_expand "extendsidi2"
[(set (match_operand:DI 0 "register_operand" "")
"@
sra\\t%1, 0, %0
ldsw\\t%1, %0"
- [(set_attr "type" "shift,sload")])
+ [(set_attr "type" "shift,sload")
+ (set_attr "us3load_type" "*,3cycle")])
\f
;; Special pattern for optimizing bit-field compares. This is needed
;; because combine uses this as a canonical form.
"TARGET_V9"
"t%C0\\t%%xcc, %1"
[(set_attr "type" "misc")])
+
+(define_insn "cycle_display"
+ [(unspec [(match_operand 0 "const_int_operand" "")] 20)]
+ ""
+ "! cycle %0"
+ [(set_attr "length" "0")])
various code generation improvements, work on the global optimizer, etc.
@item
-Vladimir Makarov for hacking some ugly i960 problems, PowerPC
-hacking improvements to compile-time performance and overall knowledge
-and direction in the area of instruction scheduling.
+Vladimir Makarov for hacking some ugly i960 problems, PowerPC hacking
+improvements to compile-time performance, overall knowledge and
+direction in the area of instruction scheduling, and design and
+implementation of the automaton based instruction scheduler.
@item
Bob Manson for his behind the scenes work on dejagnu.
@cindex instruction splitting
@cindex splitting instructions
-There are two cases where you should specify how to split a pattern into
-multiple insns. On machines that have instructions requiring delay
-slots (@pxref{Delay Slots}) or that have instructions whose output is
-not available for multiple cycles (@pxref{Function Units}), the compiler
-phases that optimize these cases need to be able to move insns into
-one-instruction delay slots. However, some insns may generate more than one
-machine instruction. These insns cannot be placed into a delay slot.
+There are two cases where you should specify how to split a pattern
+into multiple insns. On machines that have instructions requiring
+delay slots (@pxref{Delay Slots}) or that have instructions whose
+output is not available for multiple cycles (@pxref{Processor pipeline
+description}), the compiler phases that optimize these cases need to
+be able to move insns into one-instruction delay slots. However, some
+insns may generate more than one machine instruction. These insns
+cannot be placed into a delay slot.
Often you can rewrite the single insn as a list of individual insns,
each corresponding to one machine instruction. The disadvantage of
* Insn Lengths:: Computing the length of insns.
* Constant Attributes:: Defining attributes that are constant.
* Delay Slots:: Defining delay slots required for a machine.
-* Function Units:: Specifying information for insn scheduling.
+* Processor pipeline description:: Specifying information for insn scheduling.
@end menu
@node Defining Attributes
@end smallexample
@c the above is *still* too long. --mew 4feb93
-@node Function Units
-@subsection Specifying Function Units
+@node Processor pipeline description
+@subsection Specifying processor pipeline description
+@cindex processor pipeline description
+@cindex processor functional units
+@cindex instruction latency time
+@cindex interlock delays
+@cindex data dependence delays
+@cindex reservation delays
+@cindex pipeline hazard recognizer
+@cindex automaton based pipeline description
+@cindex regular expressions
+@cindex deterministic finite state automaton
+@cindex automaton based scheduler
+@cindex RISC
+@cindex VLIW
+
+To achieve better productivity most modern processors
+(super-pipelined, superscalar @acronym{RISC}, and @acronym{VLIW}
+processors) have many @dfn{functional units} on which several
+instructions can be executed simultaneously. An instruction starts
+execution if its issue conditions are satisfied. If not, the
+instruction is interlocked until its conditions are satisfied. Such
+@dfn{interlock (pipeline) delay} causes interruption of the fetching
+of successor instructions (or demands nop instructions, e.g. for some
+MIPS processors).
+
+There are two major kinds of interlock delays in modern processors.
+The first one is a data dependence delay determining @dfn{instruction
+latency time}. The instruction execution is not started until all
+source data have been evaluated by prior instructions (there are more
+complex cases when the instruction execution starts even when the data
+are not availaible but will be ready in given time after the
+instruction execution start). Taking the data dependence delays into
+account is simple. The data dependence (true, output, and
+anti-dependence) delay between two instructions is given by a
+constant. In most cases this approach is adequate. The second kind
+of interlock delays is a reservation delay. The reservation delay
+means that two instructions under execution will be in need of shared
+processors resources, i.e. buses, internal registers, and/or
+functional units, which are reserved for some time. Taking this kind
+of delay into account is complex especially for modern @acronym{RISC}
+processors.
+
+The task of exploiting more processor parallelism is solved by an
+instruction scheduler. For better solution of this problem, the
+instruction scheduler has to have an adequate description of the
+processor parallelism (or @dfn{pipeline description}). Currently GCC
+has two ways to describe processor parallelism. The first one is old
+and originated from instruction scheduler written by Michael Tiemann
+and described in the first subsequent section. The second one was
+created later. It is based on description of functional unit
+reservations by processor instructions with the aid of @dfn{regular
+expressions}. This is so called @dfn{automaton based description}.
+
+Gcc instruction scheduler uses a @dfn{pipeline hazard recognizer} to
+figure out the possibility of the instruction issue by the processor
+on given simulated processor cycle. The pipeline hazard recognizer is
+a code generated from the processor pipeline description. The
+pipeline hazard recognizer generated from the automaton based
+description is more sophisticated and based on deterministic finite
+state automaton (@acronym{DFA}) and therefore faster than one
+generated from the old description. Also its speed is not depended on
+processor complexity. The instruction issue is possible if there is
+a transition from one automaton state to another one.
+
+You can use any model to describe processor pipeline characteristics
+or even a mix of them. You could use the old description for some
+processor submodels and the @acronym{DFA}-based one for the rest
+processor submodels.
+
+In general, the usage of the automaton based description is more
+preferable. Its model is more rich. It permits to describe more
+accurately pipeline characteristics of processors which results in
+improving code quality (although sometimes only on several percent
+fractions). It will be also used as an infrastructure to implement
+sophisticated and practical insn scheduling which will try many
+instruction sequences to choose the best one.
+
+
+@menu
+* Old pipeline description:: Specifying information for insn scheduling.
+* Automaton pipeline description:: Describing insn pipeline characteristics.
+* Comparison of the two descriptions:: Drawbacks of the old pipeline description
+@end menu
+
+@node Old pipeline description
+@subsubsection Specifying Function Units
+@cindex old pipeline description
@cindex function units, for scheduling
-On most RISC machines, there are instructions whose results are not
-available for a specific number of cycles. Common cases are instructions
-that load data from memory. On many machines, a pipeline stall will result
-if the data is referenced too soon after the load instruction.
+On most @acronym{RISC} machines, there are instructions whose results
+are not available for a specific number of cycles. Common cases are
+instructions that load data from memory. On many machines, a pipeline
+stall will result if the data is referenced too soon after the load
+instruction.
In addition, many newer microprocessors have multiple function units, usually
one for integer and one for floating point, and often will incur pipeline
For the purposes of the specifications in this section, a machine is
divided into @dfn{function units}, each of which execute a specific
-class of instructions in first-in-first-out order. Function units that
-accept one instruction each cycle and allow a result to be used in the
-succeeding instruction (usually via forwarding) need not be specified.
-Classic RISC microprocessors will normally have a single function unit,
-which we can call @samp{memory}. The newer ``superscalar'' processors
-will often have function units for floating point operations, usually at
-least a floating point adder and multiplier.
+class of instructions in first-in-first-out order. Function units
+that accept one instruction each cycle and allow a result to be used
+in the succeeding instruction (usually via forwarding) need not be
+specified. Classic @acronym{RISC} microprocessors will normally have
+a single function unit, which we can call @samp{memory}. The newer
+``superscalar'' processors will often have function units for floating
+point operations, usually at least a floating point adder and
+multiplier.
@findex define_function_unit
Each usage of a function units by a class of insns is specified with a
pipeline either single- or double-precision operations, but not both, or
where a memory unit can pipeline loads, but not stores, etc.
-As an example, consider a classic RISC machine where the result of a
-load instruction is not available for two cycles (a single ``delay''
-instruction is required) and where only one load instruction can be executed
-simultaneously. This would be specified as:
+As an example, consider a classic @acronym{RISC} machine where the
+result of a load instruction is not available for two cycles (a single
+``delay'' instruction is required) and where only one load instruction
+can be executed simultaneously. This would be specified as:
@smallexample
(define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)
conflict. We welcome any examples of how function unit conflicts work
in such processors and suggestions for their representation.
+@node Automaton pipeline description
+@subsubsection Describing instruction pipeline characteristics
+@cindex automaton based pipeline description
+
+This section describes constructions of the automaton based processor
+pipeline description. The order of all mentioned below constructions
+in the machine description file is not important.
+
+@findex define_automaton
+@cindex pipeline hazard recognizer
+The following optional construction describes names of automata
+generated and used for the pipeline hazards recognition. Sometimes
+the generated finite state automaton used by the pipeline hazard
+recognizer is large. If we use more one automaton and bind functional
+units to the automata, the summary size of the automata usually is
+less than the size of the single automaton. If there is no one such
+construction, only one finite state automaton is generated.
+
+@smallexample
+(define_automaton @var{automata-names})
+@end smallexample
+
+@var{automata-names} is a string giving names of the automata. The
+names are separated by commas. All the automata should have unique names.
+The automaton name is used in construction @code{define_cpu_unit} and
+@code{define_query_cpu_unit}.
+
+@findex define_cpu_unit
+@cindex processor functional units
+Each processor functional unit used in description of instruction
+reservations should be described by the following construction.
+
+@smallexample
+(define_cpu_unit @var{unit-names} [@var{automaton-name}])
+@end smallexample
+
+@var{unit-names} is a string giving the names of the functional units
+separated by commas. Don't use name @samp{nothing}, it is reserved
+for other goals.
+
+@var{automaton-name} is a string giving the name of automaton with
+which the unit is bound. The automaton should be described in
+construction @code{define_automaton}. You should give
+@dfn{automaton-name}, if there is a defined automaton.
+
+@findex define_query_cpu_unit
+@cindex querying function unit reservations
+The following construction describes CPU functional units analogously
+to @code{define_cpu_unit}. If we use automata without their
+minimization, the reservation of such units can be queried for an
+automaton state. The instruction scheduler never queries reservation
+of functional units for given automaton state. So as a rule, you
+don't need this construction. This construction could be used for
+future code generation goals (e.g. to generate @acronym{VLIW} insn
+templates).
+
+@smallexample
+(define_query_cpu_unit @var{unit-names} [@var{automaton-name}])
+@end smallexample
+
+@var{unit-names} is a string giving names of the functional units
+separated by commas.
+
+@var{automaton-name} is a string giving name of the automaton with
+which the unit is bound.
+
+@findex define_insn_reservation
+@cindex instruction latency time
+@cindex regular expressions
+@cindex data bypass
+The following construction is major one to describe pipeline
+characteristics of an instruction.
+
+@smallexample
+(define_insn_reservation @var{insn-name} @var{default_latency}
+ @var{condition} @var{regexp})
+@end smallexample
+
+@var{default_latency} is a number giving latency time of the
+instruction. There is an important difference between the old
+description and the automaton based pipeline description. The latency
+time is used for all dependencies when we use the old description. In
+the automaton based pipeline description, given latency time is used
+only for true dependencies. The cost of anti-dependencies is always
+zero and the cost of output dependencies is the difference between
+latency times of the producing and consuming insns (if the difference
+is negative, the cost is considered to be zero). You always can
+change the default costs for any description by using target hook
+@code{TARGET_SCHED_ADJUST_COST} (@pxref{Scheduling}).
+
+@var{insn-names} is a string giving internal name of the insn. The
+internal names are used in constructions @code{define_bypass} and in
+the automaton description file generated for debugging. The internal
+name has nothing common with the names in @code{define_insn}. It is a
+good practice to use insn classes described in the processor manual.
+
+@var{condition} defines what RTL insns are described by this
+construction. You should remember that you will be in trouble if
+@var{condition} for two or more different
+@code{define_insn_reservation} constructions is TRUE for an insn. In
+this case what reservation will be used for the insn is not defined.
+Such cases are not checked during generation of the pipeline hazards
+recognizer because in general recognizing that two conditions may have
+the same value is quite difficult (especially if the conditions
+contain @code{symbol_ref}). It is also not checked during the
+pipeline hazard recognizer work because it would slow down the
+recognizer considerably.
+
+@var{regexp} is a string describing reservation of the cpu functional
+units by the instruction. The reservations are described by a regular
+expression according to the following syntax:
+
+@smallexample
+ regexp = regexp "," oneof
+ | oneof
+
+ oneof = oneof "|" allof
+ | allof
+
+ allof = allof "+" repeat
+ | repeat
+
+ repeat = element "*" number
+ | element
+
+ element = cpu_function_unit_name
+ | reservation_name
+ | result_name
+ | "nothing"
+ | "(" regexp ")"
+@end smallexample
+
+@itemize @bullet
+@item
+@samp{,} is used for describing the start of the next cycle in
+the reservation.
+
+@item
+@samp{|} is used for describing a reservation described by the first
+regular expression @strong{or} a reservation described by the second
+regular expression @strong{or} etc.
+
+@item
+@samp{+} is used for describing a reservation described by the first
+regular expression @strong{and} a reservation described by the
+second regular expression @strong{and} etc.
+
+@item
+@samp{*} is used for convenience and simply means a sequence in which
+the regular expression are repeated @var{number} times with cycle
+advancing (see @samp{,}).
+
+@item
+@samp{cpu_function_unit_name} denotes reservation of the named
+functional unit.
+
+@item
+@samp{reservation_name} --- see description of construction
+@samp{define_reservation}.
+
+@item
+@samp{nothing} denotes no unit reservations.
+@end itemize
+
+@findex define_reservation
+Sometimes unit reservations for different insns contain common parts.
+In such case, you can simplify the pipeline description by describing
+the common part by the following construction
+
+@smallexample
+(define_reservation @var{reservation-name} @var{regexp})
+@end smallexample
+
+@var{reservation-name} is a string giving name of @var{regexp}.
+Functional unit names and reservation names are in the same name
+space. So the reservation names should be different from the
+functional unit names and can not be reserved name @samp{nothing}.
+
+@findex define_bypass
+@cindex instruction latency time
+@cindex data bypass
+The following construction is used to describe exceptions in the
+latency time for given instruction pair. This is so called bypasses.
+
+@smallexample
+(define_bypass @var{number} @var{out_insn_names} @var{in_insn_names}
+ [@var{guard}])
+@end smallexample
+
+@var{number} defines when the result generated by the instructions
+given in string @var{out_insn_names} will be ready for the
+instructions given in string @var{in_insn_names}. The instructions in
+the string are separated by commas.
+
+@var{guard} is an optional string giving name of a C function which
+defines an additional guard for the bypass. The function will get the
+two insns as parameters. If the function returns zero the bypass will
+be ignored for this case. The additional guard is necessary to
+recognize complicated bypasses, e.g. when consumer is only an address
+of insn @samp{store} (not a stored value).
+
+@findex exclusion_set
+@findex presence_set
+@findex absence_set
+@cindex VLIW
+@cindex RISC
+Usually the following three constructions are used to describe
+@acronym{VLIW} processors (more correctly to describe a placement of
+small insns into @acronym{VLIW} insn slots). Although they can be
+used for @acronym{RISC} processors too.
+
+@smallexample
+(exclusion_set @var{unit-names} @var{unit-names})
+(presence_set @var{unit-names} @var{unit-names})
+(absence_set @var{unit-names} @var{unit-names})
+@end smallexample
+
+@var{unit-names} is a string giving names of functional units
+separated by commas.
+
+The first construction (@samp{exclusion_set}) means that each
+functional unit in the first string can not be reserved simultaneously
+with a unit whose name is in the second string and vice versa. For
+example, the construction is useful for describing processors
+(e.g. some SPARC processors) with a fully pipelined floating point
+functional unit which can execute simultaneously only single floating
+point insns or only double floating point insns.
+
+The second construction (@samp{presence_set}) means that each
+functional unit in the first string can not be reserved unless at
+least one of units whose names are in the second string is reserved.
+This is an asymmetric relation. For example, it is useful for
+description that @acronym{VLIW} @samp{slot1} is reserved after
+@samp{slot0} reservation.
+
+The third construction (@samp{absence_set}) means that each functional
+unit in the first string can be reserved only if each unit whose name
+is in the second string is not reserved. This is an asymmetric
+relation (actually @samp{exclusion_set} is analogous to this one but
+it is symmetric). For example, it is useful for description that
+@acronym{VLIW} @samp{slot0} can not be reserved after @samp{slot1} or
+@samp{slot2} reservation.
+
+All functional units mentioned in a set should belong the same
+automaton.
+
+@findex automata_option
+@cindex deterministic finite state automaton
+@cindex nondeterministic finite state automaton
+@cindex finite state automaton minimization
+You can control the generator of the pipeline hazard recognizer with
+the following construction.
+
+@smallexample
+(automata_option @var{options})
+@end smallexample
+
+@var{options} is a string giving options which affect the generated
+code. Currently there are the following options:
+
+@itemize @bullet
+@item
+@dfn{no-minimization} makes no minimization of the automaton. This is
+only worth to do when we are going to query CPU functional unit
+reservations in an automaton state.
+
+@item
+@dfn{w} means a generation of the file describing the result
+automaton. The file can be used to verify the description.
+
+@item
+@dfn{ndfa} makes nondeterministic finite state automata. This affects
+the treatment of operator @samp{|} in the regular expressions. The
+usual treatment of the operator is to try the first alternative and,
+if the reservation is not possible, the second alternative. The
+nondeterministic treatment means trying all alternatives, some of them
+may be rejected by reservations in the subsequent insns. You can not
+query functional unit reservations in nondeterministic automaton
+states.
+@end itemize
+
+As an example, consider a superscalar @acronym{RISC} machine which can
+issue three insns (two integer insns and one floating point insn) on
+the cycle but can finish only two insns. To describe this, we define
+the following functional units.
+
+@smallexample
+(define_cpu_unit "i0_pipeline, i1_pipeline, f_pipeline")
+(define_cpu_unit "port_0, port1")
+@end smallexample
+
+All simple integer insns can be executed in any integer pipeline and
+their result is ready in two cycles. The simple integer insns are
+issued into the first pipeline unless it is reserved, otherwise they
+are issued into the second pipeline. Integer division and
+multiplication insns can be executed only in the second integer
+pipeline and their results are ready correspondingly in 8 and 4
+cycles. The integer division is not pipelined, i.e. the subsequent
+integer division insn can not be issued until the current division
+insn finished. Floating point insns are fully pipelined and their
+results are ready in 3 cycles. There is also additional one cycle
+delay in the usage by integer insns of result produced by floating
+point insns. To describe all of this we could specify
+
+@smallexample
+(define_cpu_unit "div")
+
+(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
+ "(i0_pipeline | i1_pipeline), (port_0 | port1)")
+
+(define_insn_reservation "mult" 4 (eq_attr "cpu" "mult")
+ "i1_pipeline, nothing*2, (port_0 | port1)")
+
+(define_insn_reservation "div" 8 (eq_attr "cpu" "div")
+ "i1_pipeline, div*7, div + (port_0 | port1)")
+
+(define_insn_reservation "float" 3 (eq_attr "cpu" "float")
+ "f_pipeline, nothing, (port_0 | port1))
+
+(define_bypass 4 "float" "simple,mut,div")
+@end smallexample
+
+To simplify the description we could describe the following reservation
+
+@smallexample
+(define_reservation "finish" "port0|port1")
+@end smallexample
+
+and use it in all @code{define_insn_reservation} as in the following
+construction
+
+@smallexample
+(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
+ "(i0_pipeline | i1_pipeline), finish")
+@end smallexample
+
+
+@node Comparison of the two descriptions
+@subsubsection Drawbacks of the old pipeline description
+@cindex old pipeline description
+@cindex automaton based pipeline description
+@cindex processor functional units
+@cindex interlock delays
+@cindex instruction latency time
+@cindex pipeline hazard recognizer
+@cindex data bypass
+
+The old instruction level parallelism description and the pipeline
+hazards recognizer based on it have the following drawbacks in
+comparison with the @acronym{DFA}-based ones:
+
+@itemize @bullet
+@item
+Each functional unit is believed to be reserved at the instruction
+execution start. This is a very inaccurate model for modern
+processors.
+
+@item
+An inadequate description of instruction latency times. The latency
+time is bound with a functional unit reserved by an instruction not
+with the instruction itself. In other words, the description is
+oriented to describe at most one unit reservation by each instruction.
+It also does not permit to describe special bypasses between
+instruction pairs.
+
+@item
+The implementation of the pipeline hazard recognizer interface has
+constraints on number of functional units. This is a number of bits
+in integer on the host machine.
+
+@item
+The interface to the pipeline hazard recognizer is more complex than
+one to the automaton based pipeline recognizer.
+
+@item
+An unnatural description when you write an unit and a condition which
+selects instructions using the unit. Writing all unit reservations
+for an instruction (an instruction class) is more natural.
+
+@item
+The recognition of the interlock delays has slow implementation. GCC
+scheduler supports structures which describe the unit reservations.
+The more processor has functional units, the slower pipeline hazard
+recognizer. Such implementation would become slower when we enable to
+reserve functional units not only at the instruction execution start.
+The automaton based pipeline hazard recognizer speed is not depended
+on processor complexity.
+@end itemize
+
@node Conditional Execution
@section Conditional Execution
@cindex conditional execution
attributes defined for a particular machine is in file
@file{insn-attr.h}, which is generated from the machine description by
the program @file{genattr}. The file @file{insn-attrtab.c} contains
-subroutines to obtain the attribute values for insns. It is generated
-from the machine description by the program @file{genattrtab}.
+subroutines to obtain the attribute values for insns and information
+about processor pipeline characteristics for the instruction
+scheduler. It is generated from the machine description by the
+program @file{genattrtab}.
@end itemize
them: try the first ones in this list first.
@deftypefn {Target Hook} int TARGET_SCHED_ISSUE_RATE (void)
-This hook returns the maximum number of instructions that can ever issue
-at the same time on the target machine. The default is one. This value
-must be constant over the entire compilation. If you need it to vary
-depending on what the instructions are, you must use
+This hook returns the maximum number of instructions that can ever
+issue at the same time on the target machine. The default is one.
+Although the insn scheduler can define itself the possibility of issue
+an insn on the same cycle, the value can serve as an additional
+constraint to issue insns on the same simulated processor cycle (see
+hooks @samp{TARGET_SCHED_REORDER} and @samp{TARGET_SCHED_REORDER2}).
+This value must be constant over the entire compilation. If you need
+it to vary depending on what the instructions are, you must use
@samp{TARGET_SCHED_VARIABLE_ISSUE}.
+
+You could use the value of macro @samp{MAX_DFA_ISSUE_RATE} to return
+the value of the hook @samp{TARGET_SCHED_ISSUE_RATE} for the automaton
+based pipeline interface.
@end deftypefn
@deftypefn {Target Hook} int TARGET_SCHED_VARIABLE_ISSUE (FILE *@var{file}, int @var{verbose}, rtx @var{insn}, int @var{more})
@end deftypefn
@deftypefn {Target Hook} int TARGET_SCHED_ADJUST_COST (rtx @var{insn}, rtx @var{link}, rtx @var{dep_insn}, int @var{cost})
-This function corrects the value of @var{cost} based on the relationship
-between @var{insn} and @var{dep_insn} through the dependence @var{link}.
-It should return the new value. The default is to make no adjustment to
-@var{cost}. This can be used for example to specify to the scheduler
+This function corrects the value of @var{cost} based on the
+relationship between @var{insn} and @var{dep_insn} through the
+dependence @var{link}. It should return the new value. The default
+is to make no adjustment to @var{cost}. This can be used for example
+to specify to the scheduler using the traditional pipeline description
that an output- or anti-dependence does not incur the same cost as a
-data-dependence.
+data-dependence. If the scheduler using the automaton based pipeline
+description, the cost of anti-dependence is zero and the cost of
+output-dependence is maximum of one and the difference of latency
+times of the first and the second insns. If these values are not
+acceptable, you could use the hook to modify them too. See also
+@pxref{Automaton pipeline description}.
@end deftypefn
@deftypefn {Target Hook} int TARGET_SCHED_ADJUST_PRIORITY (rtx @var{insn}, int @var{priority})
level of detail about what the scheduler is doing.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE (void)
+This hook is called many times during insn scheduling. If the hook
+returns nonzero, the automaton based pipeline description is used for
+insn scheduling. Otherwise the traditional pipeline description is
+used. The default is usage of the traditional pipeline description.
+
+You should also remember that to simplify the insn scheduler sources
+an empty traditional pipeline description interface is generated even
+if there is no a traditional pipeline description in the @file{.md}
+file. The same is true for the automaton based pipeline description.
+That means that you should be accurate in defining the hook.
+@end deftypefn
+
+@deftypefn {Target Hook} int TARGET_SCHED_DFA_PRE_CYCLE_INSN (void)
+The hook returns an RTL insn. The automaton state used in the
+pipeline hazard recognizer is changed as if the insn were scheduled
+when the new simulated processor cycle starts. Usage of the hook may
+simplify the automaton pipeline description for some @acronym{VLIW}
+processors. If the hook is defined, it is used only for the automaton
+based pipeline description. The default is not to change the state
+when the new simulated processor cycle starts.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN (void)
+The hook can be used to initialize data used by the previous hook.
+@end deftypefn
+
+@deftypefn {Target Hook} int TARGET_SCHED_DFA_POST_CYCLE_INSN (void)
+The hook is analogous to @samp{TARGET_SCHED_DFA_PRE_CYCLE_INSN} but used
+to changed the state as if the insn were scheduled when the new
+simulated processor cycle finishes.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN (void)
+The hook is analogous to @samp{TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN} but
+used to initialize data used by the previous hook.
+@end deftypefn
+
+@deftypefn {Target Hook} int TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD (void)
+This hook controls better choosing an insn from the ready insn queue
+for the @acronym{DFA}-based insn scheduler. Usually the scheduler
+chooses the first insn from the queue. If the hook returns a positive
+value, an additional scheduler code tries all permutations of
+@samp{TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ()}
+subsequent ready insns to choose an insn whose issue will result in
+maximal number of issued insns on the same cycle. For the
+@acronym{VLIW} processor, the code could actually solve the problem of
+packing simple insns into the @acronym{VLIW} insn. Of course, if the
+rules of @acronym{VLIW} packing are described in the automaton.
+
+This code also could be used for superscalar @acronym{RISC}
+processors. Let us consider a superscalar @acronym{RISC} processor
+with 3 pipelines. Some insns can be executed in pipelines @var{A} or
+@var{B}, some insns can be executed only in pipelines @var{B} or
+@var{C}, and one insn can be executed in pipeline @var{B}. The
+processor may issue the 1st insn into @var{A} and the 2nd one into
+@var{B}. In this case, the 3rd insn will wait for freeing @var{B}
+until the next cycle. If the scheduler issues the 3rd insn the first,
+the processor could issue all 3 insns per cycle.
+
+Actually this code demonstrates advantages of the automaton based
+pipeline hazard recognizer. We try quickly and easy many insn
+schedules to choose the best one.
+
+The default is no multipass scheduling.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_BUBBLES (void)
+The @acronym{DFA}-based scheduler could take the insertion of nop
+operations for better insn scheduling into account. It can be done
+only if the multi-pass insn scheduling works (see hook
+@samp{TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD}).
+
+Let us consider a @acronym{VLIW} processor insn with 3 slots. Each
+insn can be placed only in one of the three slots. We have 3 ready
+insns @var{A}, @var{B}, and @var{C}. @var{A} and @var{C} can be
+placed only in the 1st slot, @var{B} can be placed only in the 3rd
+slot. We described the automaton which does not permit empty slot
+gaps between insns (usually such description is simpler). Without
+this code the scheduler would place each insn in 3 separate
+@acronym{VLIW} insns. If the scheduler places a nop insn into the 2nd
+slot, it could place the 3 insns into 2 @acronym{VLIW} insns. What is
+the nop insn is returned by hook @samp{TARGET_SCHED_DFA_BUBBLE}. Hook
+@samp{TARGET_SCHED_INIT_DFA_BUBBLES} can be used to initialize or
+create the nop insns.
+
+You should remember that the scheduler does not insert the nop insns.
+It is not wise because of the following optimizations. The scheduler
+only considers such possibility to improve the result schedule. The
+nop insns should be inserted lately, e.g. on the final phase.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_SCHED_DFA_BUBBLE (int @var{index})
+This hook @samp{FIRST_CYCLE_MULTIPASS_SCHEDULING} is used to insert
+nop operations for better insn scheduling when @acronym{DFA}-based
+scheduler makes multipass insn scheduling (see also description of
+hook @samp{TARGET_SCHED_INIT_DFA_BUBBLES}). This hook
+returns a nop insn with given @var{index}. The indexes start with
+zero. The hook should return @code{NULL} if there are no more nop
+insns with indexes greater than given index.
+@end deftypefn
+
+Macros in the following table are generated by the program
+@file{genattr} and can be useful for writing the hooks.
+
+@table @code
+@findex TRADITIONAL_PIPELINE_INTERFACE
+@item TRADITIONAL_PIPELINE_INTERFACE
+The macro definition is generated if there is a traditional pipeline
+description in @file{.md} file. You should also remember that to
+simplify the insn scheduler sources an empty traditional pipeline
+description interface is generated even if there is no a traditional
+pipeline description in the @file{.md} file. The macro can be used to
+distinguish the two types of the traditional interface.
+
+@findex DFA_PIPELINE_INTERFACE
+@item DFA_PIPELINE_INTERFACE
+The macro definition is generated if there is an automaton pipeline
+description in @file{.md} file. You should also remember that to
+simplify the insn scheduler sources an empty automaton pipeline
+description interface is generated even if there is no an automaton
+pipeline description in the @file{.md} file. The macro can be used to
+distinguish the two types of the automaton interface.
+
+@findex MAX_DFA_ISSUE_RATE
+@item MAX_DFA_ISSUE_RATE
+The macro definition is generated in the automaton based pipeline
+description interface. Its value is calculated from the automaton
+based pipeline description and is equal to maximal number of all insns
+described in constructions @samp{define_insn_reservation} which can be
+issued on the same processor cycle.
+
+@end table
+
@node Sections
@section Dividing the Output into Sections (Texts, Data, @dots{})
@c the above section title is WAY too long. maybe cut the part between
int have_delay = 0;
int have_annul_true = 0;
int have_annul_false = 0;
+ int num_insn_reservations = 0;
int num_units = 0;
struct range all_simultaneity, all_multiplicity;
struct range all_ready_cost, all_issue_delay, all_blockage;
extend_range (&all_issue_delay,
unit->issue_delay.min, unit->issue_delay.max);
}
+ else if (GET_CODE (desc) == DEFINE_INSN_RESERVATION)
+ num_insn_reservations++;
}
- if (num_units > 0)
+ if (num_units > 0 || num_insn_reservations > 0)
{
+ if (num_units > 0)
+ printf ("#define TRADITIONAL_PIPELINE_INTERFACE 1\n");
+
+ if (num_insn_reservations > 0)
+ printf ("#define DFA_PIPELINE_INTERFACE 1\n");
+
/* Compute the range of blockage cost values. See genattrtab.c
for the derivation. BLOCKAGE (E,C) when SIMULTANEITY is zero is
write_units (num_units, &all_multiplicity, &all_simultaneity,
&all_ready_cost, &all_issue_delay, &all_blockage);
+
+ /* Output interface for pipeline hazards recognition based on
+ DFA (deterministic finite state automata. */
+ printf ("\n/* DFA based pipeline interface. */");
+ printf ("\n#ifndef AUTOMATON_STATE_ALTS\n");
+ printf ("#define AUTOMATON_STATE_ALTS 0\n");
+ printf ("#endif\n\n");
+ printf ("#ifndef CPU_UNITS_QUERY\n");
+ printf ("#define CPU_UNITS_QUERY 0\n");
+ printf ("#endif\n\n");
+ /* Interface itself: */
+ printf ("extern int max_dfa_issue_rate;\n\n");
+ printf ("/* The following macro value is calculated from the\n");
+ printf (" automaton based pipeline description and is equal to\n");
+ printf (" maximal number of all insns described in constructions\n");
+ printf (" `define_insn_reservation' which can be issued on the\n");
+ printf (" same processor cycle. */\n");
+ printf ("#define MAX_DFA_ISSUE_RATE max_dfa_issue_rate\n\n");
+ printf ("/* Insn latency time defined in define_insn_reservation. */\n");
+ printf ("extern int insn_default_latency PARAMS ((rtx));\n\n");
+ printf ("/* Return nonzero if there is a bypass for given insn\n");
+ printf (" which is a data producer. */\n");
+ printf ("extern int bypass_p PARAMS ((rtx));\n\n");
+ printf ("/* Insn latency time on data consumed by the 2nd insn.\n");
+ printf (" Use the function if bypass_p returns nonzero for\n");
+ printf (" the 1st insn. */\n");
+ printf ("extern int insn_latency PARAMS ((rtx, rtx));\n\n");
+ printf ("/* The following function returns number of alternative\n");
+ printf (" reservations of given insn. It may be used for better\n");
+ printf (" insns scheduling heuristics. */\n");
+ printf ("extern int insn_alts PARAMS ((rtx));\n\n");
+ printf ("/* Maximal possible number of insns waiting results being\n");
+ printf (" produced by insns whose execution is not finished. */\n");
+ printf ("extern int max_insn_queue_index;\n\n");
+ printf ("/* Pointer to data describing current state of DFA. */\n");
+ printf ("typedef void *state_t;\n\n");
+ printf ("/* Size of the data in bytes. */\n");
+ printf ("extern int state_size PARAMS ((void));\n\n");
+ printf ("/* Initiate given DFA state, i.e. Set up the state\n");
+ printf (" as all functional units were not reserved. */\n");
+ printf ("extern void state_reset PARAMS ((state_t));\n");
+ printf ("/* The following function returns negative value if given\n");
+ printf (" insn can be issued in processor state described by given\n");
+ printf (" DFA state. In this case, the DFA state is changed to\n");
+ printf (" reflect the current and future reservations by given\n");
+ printf (" insn. Otherwise the function returns minimal time\n");
+ printf (" delay to issue the insn. This delay may be zero\n");
+ printf (" for superscalar or VLIW processors. If the second\n");
+ printf (" parameter is NULL the function changes given DFA state\n");
+ printf (" as new processor cycle started. */\n");
+ printf ("extern int state_transition PARAMS ((state_t, rtx));\n");
+ printf ("\n#if AUTOMATON_STATE_ALTS\n");
+ printf ("/* The following function returns number of possible\n");
+ printf (" alternative reservations of given insn in given\n");
+ printf (" DFA state. It may be used for better insns scheduling\n");
+ printf (" heuristics. By default the function is defined if\n");
+ printf (" macro AUTOMATON_STATE_ALTS is defined because its\n");
+ printf (" implementation may require much memory. */\n");
+ printf ("extern int state_alts PARAMS ((state_t, rtx));\n");
+ printf ("#endif\n\n");
+ printf ("extern int min_issue_delay PARAMS ((state_t, rtx));\n");
+ printf ("/* The following function returns nonzero if no one insn\n");
+ printf (" can be issued in current DFA state. */\n");
+ printf ("extern int state_dead_lock_p PARAMS ((state_t));\n");
+ printf ("/* The function returns minimal delay of issue of the 2nd\n");
+ printf (" insn after issuing the 1st insn in given DFA state.\n");
+ printf (" The 1st insn should be issued in given state (i.e.\n");
+ printf (" state_transition should return negative value for\n");
+ printf (" the insn and the state). Data dependencies between\n");
+ printf (" the insns are ignored by the function. */\n");
+ printf
+ ("extern int min_insn_conflict_delay PARAMS ((state_t, rtx, rtx));\n");
+ printf ("/* The following function outputs reservations for given\n");
+ printf (" insn as they are described in the corresponding\n");
+ printf (" define_insn_reservation. */\n");
+ printf ("extern void print_reservation PARAMS ((FILE *, rtx));\n");
+ printf ("\n#if CPU_UNITS_QUERY\n");
+ printf ("/* The following function returns code of functional unit\n");
+ printf (" with given name (see define_cpu_unit). */\n");
+ printf ("extern int get_cpu_unit_code PARAMS ((const char *));\n");
+ printf ("/* The following function returns nonzero if functional\n");
+ printf (" unit with given code is currently reserved in given\n");
+ printf (" DFA state. */\n");
+ printf ("extern int cpu_unit_reservation_p PARAMS ((state_t, int));\n");
+ printf ("#endif\n\n");
+ printf ("/* Initiate and finish work with DFA. They should be\n");
+ printf (" called as the first and the last interface\n");
+ printf (" functions. */\n");
+ printf ("extern void dfa_start PARAMS ((void));\n");
+ printf ("extern void dfa_finish PARAMS ((void));\n");
+ }
+ else
+ {
+ /* Otherwise we do no scheduling, but we need these typedefs
+ in order to avoid uglifying other code with more ifdefs. */
+ printf ("typedef void *state_t;\n\n");
}
/* Output flag masks for use by reorg.
#include "obstack.h"
#include "errors.h"
+#include "genattrtab.h"
+
static struct obstack obstack1, obstack2;
struct obstack *hash_obstack = &obstack1;
struct obstack *temp_obstack = &obstack2;
static int num_units, num_unit_opclasses;
static int num_insn_ents;
+int num_dfa_decls;
+
/* Used as operand to `operate_exp': */
enum operator {PLUS_OP, MINUS_OP, POS_MINUS_OP, EQ_OP, OR_OP, ORX_OP, MAX_OP, MIN_OP, RANGE_OP};
static void attr_hash_add_string PARAMS ((int, char *));
static rtx attr_rtx PARAMS ((enum rtx_code, ...));
static rtx attr_rtx_1 PARAMS ((enum rtx_code, va_list));
-static char *attr_printf PARAMS ((unsigned int, const char *, ...))
- ATTRIBUTE_PRINTF_2;
static char *attr_string PARAMS ((const char *, int));
-static rtx check_attr_test PARAMS ((rtx, int, int));
static rtx check_attr_value PARAMS ((rtx, struct attr_desc *));
static rtx convert_set_attr_alternative PARAMS ((rtx, struct insn_def *));
static rtx convert_set_attr PARAMS ((rtx, struct insn_def *));
static int n_comma_elts PARAMS ((const char *));
static char *next_comma_elt PARAMS ((const char **));
static struct attr_desc *find_attr PARAMS ((const char *, int));
-static void make_internal_attr PARAMS ((const char *, rtx, int));
static struct attr_value *find_most_used PARAMS ((struct attr_desc *));
static rtx find_single_value PARAMS ((struct attr_desc *));
-static rtx make_numeric_value PARAMS ((int));
static void extend_range PARAMS ((struct range *, int, int));
static rtx attr_eq PARAMS ((const char *, const char *));
static const char *attr_numeral PARAMS ((int));
rtx attr_printf (len, format, [arg1, ..., argn]) */
-static char *
+char *
attr_printf VPARAMS ((unsigned int len, const char *fmt, ...))
{
char str[256];
Return the new expression, if any. */
-static rtx
+rtx
check_attr_test (exp, is_const, lineno)
rtx exp;
int is_const;
/* Create internal attribute with the given default value. */
-static void
+void
make_internal_attr (name, value, special)
const char *name;
rtx value;
/* Return (attr_value "n") */
-static rtx
+rtx
make_numeric_value (n)
int n;
{
/* Read the machine description. */
+ initiate_automaton_gen (argc, argv);
while (1)
{
int lineno;
gen_unit (desc, lineno);
break;
+ case DEFINE_CPU_UNIT:
+ gen_cpu_unit (desc);
+ break;
+
+ case DEFINE_QUERY_CPU_UNIT:
+ gen_query_cpu_unit (desc);
+ break;
+
+ case DEFINE_BYPASS:
+ gen_bypass (desc);
+ break;
+
+ case EXCLUSION_SET:
+ gen_excl_set (desc);
+ break;
+
+ case PRESENCE_SET:
+ gen_presence_set (desc);
+ break;
+
+ case ABSENCE_SET:
+ gen_absence_set (desc);
+ break;
+
+ case DEFINE_AUTOMATON:
+ gen_automaton (desc);
+ break;
+
+ case AUTOMATA_OPTION:
+ gen_automata_option (desc);
+ break;
+
+ case DEFINE_RESERVATION:
+ gen_reserv (desc);
+ break;
+
+ case DEFINE_INSN_RESERVATION:
+ gen_insn_reserv (desc);
+ break;
+
default:
break;
}
if (num_delays)
expand_delays ();
- /* Expand DEFINE_FUNCTION_UNIT information into new attributes. */
- if (num_units)
- expand_units ();
+ if (num_units || num_dfa_decls)
+ {
+ /* Expand DEFINE_FUNCTION_UNIT information into new attributes. */
+ expand_units ();
+ /* Build DFA, output some functions and expand DFA information
+ into new attributes. */
+ expand_automata ();
+ }
printf ("#include \"config.h\"\n");
printf ("#include \"system.h\"\n");
write_eligible_delay ("annul_false");
}
- /* Write out information about function units. */
- if (num_units)
- write_function_unit_info ();
+ if (num_units || num_dfa_decls)
+ {
+ /* Write out information about function units. */
+ write_function_unit_info ();
+ /* Output code for pipeline hazards recognition based on DFA
+ (deterministic finite state automata. */
+ write_automata ();
+ }
/* Write out constant delay slot info */
write_const_num_delay_slots ();
--- /dev/null
+/* External definitions of source files of genattrtab.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* Defined in genattrtab.c: */
+extern rtx check_attr_test PARAMS ((rtx, int, int));
+extern rtx make_numeric_value PARAMS ((int));
+extern void make_internal_attr PARAMS ((const char *, rtx, int));
+extern char *attr_printf PARAMS ((unsigned int, const char *, ...))
+ ATTRIBUTE_PRINTF_2;
+
+extern int num_dfa_decls;
+
+/* Defined in genautomata.c: */
+extern void gen_cpu_unit PARAMS ((rtx));
+extern void gen_query_cpu_unit PARAMS ((rtx));
+extern void gen_bypass PARAMS ((rtx));
+extern void gen_excl_set PARAMS ((rtx));
+extern void gen_presence_set PARAMS ((rtx));
+extern void gen_absence_set PARAMS ((rtx));
+extern void gen_automaton PARAMS ((rtx));
+extern void gen_automata_option PARAMS ((rtx));
+extern void gen_reserv PARAMS ((rtx));
+extern void gen_insn_reserv PARAMS ((rtx));
+extern void initiate_automaton_gen PARAMS ((int, char **));
+extern void expand_automata PARAMS ((void));
+extern void write_automata PARAMS ((void));
--- /dev/null
+/* Pipeline hazard description translator.
+ Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+
+ Written by Vladimir Makarov <vmakarov@redhat.com>
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+GNU CC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+/* References:
+
+ 1. Detecting pipeline structural hazards quickly. T. Proebsting,
+ C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
+ Principles of Programming Languages, pages 280--286, 1994.
+
+ This article is a good start point to understand usage of finite
+ state automata for pipeline hazard recognizers. But I'd
+ recommend the 2nd article for more deep understanding.
+
+ 2. Efficient Instruction Scheduling Using Finite State Automata:
+ V. Bala and N. Rubin, Proceedings of MICRO-28. This is the best
+ article about usage of finite state automata for pipeline hazard
+ recognizers.
+
+ The current implementation is different from the 2nd article in the
+ following:
+
+ 1. New operator `|' (alternative) is permitted in functional unit
+ reservation which can be treated deterministicly and
+ non-deterministicly.
+
+ 2. Possibility of usage of nondeterministic automata too.
+
+ 3. Possibility to query functional unit reservations for given
+ automaton state.
+
+ 4. Several constructions to describe impossible reservations
+ (`exclusion_set', `presence_set', and `absence_set').
+
+ 5. No reverse automata are generated. Trace instruction scheduling
+ requires this. It can be easily added in the future if we
+ really need this.
+
+ 6. Union of automaton states are not generated yet. It is planned
+ to be implemented. Such feature is needed to make more accurate
+ interlock insn scheduling to get state describing functional
+ unit reservation in a joint CFG point.
+*/
+
+/* This file code processes constructions of machine description file
+ which describes automaton used for recognition of processor pipeline
+ hazards by insn scheduler and can be used for other tasks (such as
+ VLIW insn packing.
+
+ The translator functions `gen_cpu_unit', `gen_query_cpu_unit',
+ `gen_bypass', `gen_excl_set', `gen_presence_set',
+ `gen_absence_set', `gen_automaton', `gen_automata_option',
+ `gen_reserv', `gen_insn_reserv' are called from file
+ `genattrtab.c'. They transform RTL constructions describing
+ automata in .md file into internal representation convenient for
+ further processing.
+
+ The translator major function `expand_automata' processes the
+ description internal representation into finite state automaton.
+ It can be divided on:
+
+ o checking correctness of the automaton pipeline description
+ (major function is `check_all_description').
+
+ o generating automaton (automata) from the description (major
+ function is `make_automaton').
+
+ o optional transformation of nondeterministic finite state
+ automata into deterministic ones if the alternative operator
+ `|' is treated nondeterministicly in the description (major
+ function is NDFA_to_DFA).
+
+ o optional minimization of the finite state automata by merging
+ equivalent automaton states (major function is `minimize_DFA').
+
+ o forming tables (some as comb vectors) and attributes
+ representing the automata (functions output_..._table).
+
+ Function `write_automata' outputs the created finite state
+ automaton as different tables and functions which works with the
+ automata to inquire automaton state and to change its state. These
+ function are used by gcc instruction scheduler and may be some
+ other gcc code. */
+
+#include "hconfig.h"
+#include "system.h"
+#include "rtl.h"
+#include "obstack.h"
+#include "errors.h"
+
+#include <ctype.h>
+#include <math.h>
+#include "hashtab.h"
+#include "varray.h"
+
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+#endif
+
+#include "genattrtab.h"
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+/* Positions in machine description file. Now they are not used. But
+ they could be used in the future for better diagnostic messages. */
+typedef int pos_t;
+
+/* The following is element of vector of current (and planned in the
+ future) functional unit reservations. */
+typedef unsigned HOST_WIDE_INT set_el_t;
+
+/* Reservations of function units are represented by value of the following
+ type. */
+typedef set_el_t *reserv_sets_t;
+
+/* The following structure represents variable length array (vla) of
+ pointers and HOST WIDE INTs. We could be use only varray. But we
+ add new lay because we add elements very frequently and this could
+ stress OS allocator when varray is used only. */
+typedef struct {
+ size_t length; /* current size of vla. */
+ varray_type varray; /* container for vla. */
+} vla_ptr_t;
+
+typedef vla_ptr_t vla_hwint_t;
+
+/* The following structure describes a ticker. */
+struct ticker
+{
+ /* The following member value is time of the ticker creation with
+ taking into account time when the ticker is off. Active time of
+ the ticker is current time minus the value. */
+ int modified_creation_time;
+ /* The following member value is time (incremented by one) when the
+ ticker was off. Zero value means that now the ticker is on. */
+ int incremented_off_time;
+};
+
+/* The ticker is represented by the following type. */
+typedef struct ticker ticker_t;
+
+/* The following type describes elements of output vectors. */
+typedef HOST_WIDE_INT vect_el_t;
+
+/* Forward declaration of structures of internal representation of
+ pipeline description based on NDFA. */
+
+struct unit_decl;
+struct bypass_decl;
+struct result_decl;
+struct automaton_decl;
+struct unit_rel_decl;
+struct reserv_decl;
+struct insn_reserv_decl;
+struct decl;
+struct unit_regexp;
+struct result_regexp;
+struct reserv_regexp;
+struct nothing_regexp;
+struct sequence_regexp;
+struct repeat_regexp;
+struct allof_regexp;
+struct oneof_regexp;
+struct regexp;
+struct description;
+struct unit_set_el;
+struct state;
+struct alt_state;
+struct arc;
+struct ainsn;
+struct automaton;
+struct state_ainsn_table;
+
+/* The following typedefs are for brevity. */
+typedef struct decl *decl_t;
+typedef struct regexp *regexp_t;
+typedef struct unit_set_el *unit_set_el_t;
+typedef struct alt_state *alt_state_t;
+typedef struct state *state_t;
+typedef struct arc *arc_t;
+typedef struct ainsn *ainsn_t;
+typedef struct automaton *automaton_t;
+typedef struct automata_list_el *automata_list_el_t;
+typedef struct state_ainsn_table *state_ainsn_table_t;
+
+
+/* Prototypes of functions gen_cpu_unit, gen_query_cpu_unit,
+ gen_bypass, gen_excl_set, gen_presence_set, gen_absence_set,
+ gen_automaton, gen_automata_option, gen_reserv, gen_insn_reserv,
+ initiate_automaton_gen, expand_automata, write_automata are
+ described on the file top because the functions are called from
+ function `main'. */
+
+static void *create_node PARAMS ((size_t));
+static void *copy_node PARAMS ((void *, size_t));
+static char *check_name PARAMS ((char *, pos_t));
+static char *next_sep_el PARAMS ((char **, int, int));
+static int n_sep_els PARAMS ((char *, int, int));
+static char **get_str_vect PARAMS ((char *, int *, int, int));
+static regexp_t gen_regexp_el PARAMS ((char *));
+static regexp_t gen_regexp_repeat PARAMS ((char *));
+static regexp_t gen_regexp_allof PARAMS ((char *));
+static regexp_t gen_regexp_oneof PARAMS ((char *));
+static regexp_t gen_regexp_sequence PARAMS ((char *));
+static regexp_t gen_regexp PARAMS ((char *));
+
+static unsigned string_hash PARAMS ((const char *));
+static unsigned automaton_decl_hash PARAMS ((const void *));
+static int automaton_decl_eq_p PARAMS ((const void *,
+ const void *));
+static decl_t insert_automaton_decl PARAMS ((decl_t));
+static decl_t find_automaton_decl PARAMS ((char *));
+static void initiate_automaton_decl_table PARAMS ((void));
+static void finish_automaton_decl_table PARAMS ((void));
+
+static unsigned insn_decl_hash PARAMS ((const void *));
+static int insn_decl_eq_p PARAMS ((const void *,
+ const void *));
+static decl_t insert_insn_decl PARAMS ((decl_t));
+static decl_t find_insn_decl PARAMS ((char *));
+static void initiate_insn_decl_table PARAMS ((void));
+static void finish_insn_decl_table PARAMS ((void));
+
+static unsigned decl_hash PARAMS ((const void *));
+static int decl_eq_p PARAMS ((const void *,
+ const void *));
+static decl_t insert_decl PARAMS ((decl_t));
+static decl_t find_decl PARAMS ((char *));
+static void initiate_decl_table PARAMS ((void));
+static void finish_decl_table PARAMS ((void));
+
+static unit_set_el_t process_excls PARAMS ((char **, int, pos_t));
+static void add_excls PARAMS ((unit_set_el_t, unit_set_el_t,
+ pos_t));
+static unit_set_el_t process_presence_absence
+ PARAMS ((char **, int, pos_t, int));
+static void add_presence_absence PARAMS ((unit_set_el_t, unit_set_el_t,
+ pos_t, int));
+static void process_decls PARAMS ((void));
+static struct bypass_decl *find_bypass PARAMS ((struct bypass_decl *,
+ struct insn_reserv_decl *));
+static void check_automaton_usage PARAMS ((void));
+static regexp_t process_regexp PARAMS ((regexp_t));
+static void process_regexp_decls PARAMS ((void));
+static void check_usage PARAMS ((void));
+static int loop_in_regexp PARAMS ((regexp_t, decl_t));
+static void check_loops_in_regexps PARAMS ((void));
+static int process_regexp_cycles PARAMS ((regexp_t, int));
+static void evaluate_max_reserv_cycles PARAMS ((void));
+static void check_all_description PARAMS ((void));
+
+static ticker_t create_ticker PARAMS ((void));
+static void ticker_off PARAMS ((ticker_t *));
+static void ticker_on PARAMS ((ticker_t *));
+static int active_time PARAMS ((ticker_t));
+static void print_active_time PARAMS ((FILE *, ticker_t));
+
+static void add_advance_cycle_insn_decl PARAMS ((void));
+
+static alt_state_t get_free_alt_state PARAMS ((void));
+static void free_alt_state PARAMS ((alt_state_t));
+static void free_alt_states PARAMS ((alt_state_t));
+static int alt_state_cmp PARAMS ((const void *alt_state_ptr_1,
+ const void *alt_state_ptr_2));
+static alt_state_t uniq_sort_alt_states PARAMS ((alt_state_t));
+static int alt_states_eq PARAMS ((alt_state_t, alt_state_t));
+static void initiate_alt_states PARAMS ((void));
+static void finish_alt_states PARAMS ((void));
+
+static reserv_sets_t alloc_empty_reserv_sets PARAMS ((void));
+static unsigned reserv_sets_hash_value PARAMS ((reserv_sets_t));
+static int reserv_sets_cmp PARAMS ((reserv_sets_t, reserv_sets_t));
+static int reserv_sets_eq PARAMS ((reserv_sets_t, reserv_sets_t));
+static void set_unit_reserv PARAMS ((reserv_sets_t, int, int));
+static int test_unit_reserv PARAMS ((reserv_sets_t, int, int));
+static int it_is_empty_reserv_sets PARAMS ((reserv_sets_t))
+ ATTRIBUTE_UNUSED;
+static int reserv_sets_are_intersected PARAMS ((reserv_sets_t, reserv_sets_t));
+static void reserv_sets_shift PARAMS ((reserv_sets_t, reserv_sets_t));
+static void reserv_sets_or PARAMS ((reserv_sets_t, reserv_sets_t,
+ reserv_sets_t));
+static void reserv_sets_and PARAMS ((reserv_sets_t, reserv_sets_t,
+ reserv_sets_t))
+ ATTRIBUTE_UNUSED;
+static void output_cycle_reservs PARAMS ((FILE *, reserv_sets_t,
+ int, int));
+static void output_reserv_sets PARAMS ((FILE *, reserv_sets_t));
+static state_t get_free_state PARAMS ((int, automaton_t));
+static void free_state PARAMS ((state_t));
+static unsigned state_hash PARAMS ((const void *));
+static int state_eq_p PARAMS ((const void *, const void *));
+static state_t insert_state PARAMS ((state_t));
+static void set_state_reserv PARAMS ((state_t, int, int));
+static int intersected_state_reservs_p PARAMS ((state_t, state_t));
+static state_t states_union PARAMS ((state_t, state_t));
+static state_t state_shift PARAMS ((state_t));
+static void initiate_states PARAMS ((void));
+static void finish_states PARAMS ((void));
+
+static void free_arc PARAMS ((arc_t));
+static void remove_arc PARAMS ((state_t, arc_t));
+static arc_t find_arc PARAMS ((state_t, state_t, ainsn_t));
+static arc_t add_arc PARAMS ((state_t, state_t, ainsn_t, int));
+static arc_t first_out_arc PARAMS ((state_t));
+static arc_t next_out_arc PARAMS ((arc_t));
+static void initiate_arcs PARAMS ((void));
+static void finish_arcs PARAMS ((void));
+
+static automata_list_el_t get_free_automata_list_el PARAMS ((void));
+static void free_automata_list_el PARAMS ((automata_list_el_t));
+static void free_automata_list PARAMS ((automata_list_el_t));
+static unsigned automata_list_hash PARAMS ((const void *));
+static int automata_list_eq_p PARAMS ((const void *, const void *));
+static void initiate_automata_lists PARAMS ((void));
+static void automata_list_start PARAMS ((void));
+static void automata_list_add PARAMS ((automaton_t));
+static automata_list_el_t automata_list_finish PARAMS ((void));
+static void finish_automata_lists PARAMS ((void));
+
+static void initiate_excl_sets PARAMS ((void));
+static reserv_sets_t get_excl_set PARAMS ((reserv_sets_t));
+
+static void initiate_presence_absence_sets PARAMS ((void));
+static reserv_sets_t get_presence_absence_set PARAMS ((reserv_sets_t, int));
+
+static regexp_t copy_insn_regexp PARAMS ((regexp_t));
+static regexp_t transform_1 PARAMS ((regexp_t));
+static regexp_t transform_2 PARAMS ((regexp_t));
+static regexp_t transform_3 PARAMS ((regexp_t));
+static regexp_t regexp_transform_func
+ PARAMS ((regexp_t, regexp_t (*) (regexp_t)));
+static regexp_t transform_regexp PARAMS ((regexp_t));
+static void transform_insn_regexps PARAMS ((void));
+
+static int process_seq_for_forming_states PARAMS ((regexp_t, automaton_t,
+ int));
+static void finish_forming_alt_state PARAMS ((alt_state_t,
+ automaton_t));
+static void process_alts_for_forming_states PARAMS ((regexp_t,
+ automaton_t, int));
+static void create_alt_states PARAMS ((automaton_t));
+
+static void form_ainsn_with_same_reservs PARAMS ((automaton_t));
+
+static void make_automaton PARAMS ((automaton_t));
+static void form_arcs_marked_by_insn PARAMS ((state_t));
+static void create_composed_state PARAMS ((state_t, arc_t, vla_ptr_t *));
+static void NDFA_to_DFA PARAMS ((automaton_t));
+static void pass_state_graph PARAMS ((state_t, void (*) (state_t)));
+static void pass_states PARAMS ((automaton_t,
+ void (*) (state_t)));
+static void initiate_pass_states PARAMS ((void));
+static void add_achieved_state PARAMS ((state_t));
+static int set_out_arc_insns_equiv_num PARAMS ((state_t, int));
+static void clear_arc_insns_equiv_num PARAMS ((state_t));
+static void copy_equiv_class PARAMS ((vla_ptr_t *to,
+ const vla_ptr_t *from));
+static int state_is_differed PARAMS ((state_t, int, int));
+static state_t init_equiv_class PARAMS ((state_t *states, int));
+static int partition_equiv_class PARAMS ((state_t *, int,
+ vla_ptr_t *, int *));
+static void evaluate_equiv_classes PARAMS ((automaton_t, vla_ptr_t *));
+static void merge_states PARAMS ((automaton_t, vla_ptr_t *));
+static void set_new_cycle_flags PARAMS ((state_t));
+static void minimize_DFA PARAMS ((automaton_t));
+static void incr_states_and_arcs_nums PARAMS ((state_t));
+static void count_states_and_arcs PARAMS ((automaton_t, int *, int *));
+static void build_automaton PARAMS ((automaton_t));
+
+static void set_order_state_num PARAMS ((state_t));
+static void enumerate_states PARAMS ((automaton_t));
+
+static ainsn_t insert_ainsn_into_equiv_class PARAMS ((ainsn_t, ainsn_t));
+static void delete_ainsn_from_equiv_class PARAMS ((ainsn_t));
+static void process_insn_equiv_class PARAMS ((ainsn_t, arc_t *));
+static void process_state_for_insn_equiv_partition PARAMS ((state_t));
+static void set_insn_equiv_classes PARAMS ((automaton_t));
+
+static double estimate_one_automaton_bound PARAMS ((void));
+static int compare_max_occ_cycle_nums PARAMS ((const void *,
+ const void *));
+static void units_to_automata_heuristic_distr PARAMS ((void));
+static ainsn_t create_ainsns PARAMS ((void));
+static void units_to_automata_distr PARAMS ((void));
+static void create_automata PARAMS ((void));
+
+static void form_regexp PARAMS ((regexp_t));
+static const char *regexp_representation PARAMS ((regexp_t));
+static void finish_regexp_representation PARAMS ((void));
+
+static void output_range_type PARAMS ((FILE *, long int, long int));
+static int longest_path_length PARAMS ((state_t));
+static void process_state_longest_path_length PARAMS ((state_t));
+static void output_dfa_max_issue_rate PARAMS ((void));
+static void output_vect PARAMS ((vect_el_t *, int));
+static void output_chip_member_name PARAMS ((FILE *, automaton_t));
+static void output_temp_chip_member_name PARAMS ((FILE *, automaton_t));
+static void output_translate_vect_name PARAMS ((FILE *, automaton_t));
+static void output_trans_full_vect_name PARAMS ((FILE *, automaton_t));
+static void output_trans_comb_vect_name PARAMS ((FILE *, automaton_t));
+static void output_trans_check_vect_name PARAMS ((FILE *, automaton_t));
+static void output_trans_base_vect_name PARAMS ((FILE *, automaton_t));
+static void output_state_alts_full_vect_name PARAMS ((FILE *, automaton_t));
+static void output_state_alts_comb_vect_name PARAMS ((FILE *, automaton_t));
+static void output_state_alts_check_vect_name PARAMS ((FILE *, automaton_t));
+static void output_state_alts_base_vect_name PARAMS ((FILE *, automaton_t));
+static void output_min_issue_delay_vect_name PARAMS ((FILE *, automaton_t));
+static void output_dead_lock_vect_name PARAMS ((FILE *, automaton_t));
+static void output_reserved_units_table_name PARAMS ((FILE *, automaton_t));
+static void output_state_member_type PARAMS ((FILE *, automaton_t));
+static void output_chip_definitions PARAMS ((void));
+static void output_translate_vect PARAMS ((automaton_t));
+static int comb_vect_p PARAMS ((state_ainsn_table_t));
+static state_ainsn_table_t create_state_ainsn_table PARAMS ((automaton_t));
+static void output_state_ainsn_table
+ PARAMS ((state_ainsn_table_t, char *, void (*) (FILE *, automaton_t),
+ void (*) (FILE *, automaton_t), void (*) (FILE *, automaton_t),
+ void (*) (FILE *, automaton_t)));
+static void add_vect PARAMS ((state_ainsn_table_t,
+ int, vect_el_t *, int));
+static int out_state_arcs_num PARAMS ((state_t));
+static int compare_transition_els_num PARAMS ((const void *, const void *));
+static void add_vect_el PARAMS ((vla_hwint_t *,
+ ainsn_t, int));
+static void add_states_vect_el PARAMS ((state_t));
+static void output_trans_table PARAMS ((automaton_t));
+static void output_state_alts_table PARAMS ((automaton_t));
+static void min_issue_delay_pass_states PARAMS ((state_t, ainsn_t));
+static int min_issue_delay PARAMS ((state_t, ainsn_t));
+static void initiate_min_issue_delay_pass_states PARAMS ((void));
+static void output_min_issue_delay_table PARAMS ((automaton_t));
+static void output_dead_lock_vect PARAMS ((automaton_t));
+static void output_reserved_units_table PARAMS ((automaton_t));
+static void output_tables PARAMS ((void));
+static void output_max_insn_queue_index_def PARAMS ((void));
+static void output_insn_code_cases PARAMS ((void (*) (automata_list_el_t)));
+static void output_automata_list_min_issue_delay_code PARAMS ((automata_list_el_t));
+static void output_internal_min_issue_delay_func PARAMS ((void));
+static void output_automata_list_transition_code PARAMS ((automata_list_el_t));
+static void output_internal_trans_func PARAMS ((void));
+static void output_internal_insn_code_evaluation PARAMS ((const char *,
+ const char *, int));
+static void output_dfa_insn_code_func PARAMS ((void));
+static void output_trans_func PARAMS ((void));
+static void output_automata_list_state_alts_code PARAMS ((automata_list_el_t));
+static void output_internal_state_alts_func PARAMS ((void));
+static void output_state_alts_func PARAMS ((void));
+static void output_min_issue_delay_func PARAMS ((void));
+static void output_internal_dead_lock_func PARAMS ((void));
+static void output_dead_lock_func PARAMS ((void));
+static void output_internal_reset_func PARAMS ((void));
+static void output_size_func PARAMS ((void));
+static void output_reset_func PARAMS ((void));
+static void output_min_insn_conflict_delay_func PARAMS ((void));
+static void output_internal_insn_latency_func PARAMS ((void));
+static void output_insn_latency_func PARAMS ((void));
+static void output_print_reservation_func PARAMS ((void));
+static int units_cmp PARAMS ((const void *,
+ const void *));
+static void output_get_cpu_unit_code_func PARAMS ((void));
+static void output_cpu_unit_reservation_p PARAMS ((void));
+static void output_dfa_start_func PARAMS ((void));
+static void output_dfa_finish_func PARAMS ((void));
+
+static void output_regexp PARAMS ((regexp_t ));
+static void output_unit_set_el_list PARAMS ((unit_set_el_t));
+static void output_description PARAMS ((void));
+static void output_automaton_name PARAMS ((FILE *, automaton_t));
+static void output_automaton_units PARAMS ((automaton_t));
+static void add_state_reservs PARAMS ((state_t));
+static void output_state_arcs PARAMS ((state_t));
+static int state_reservs_cmp PARAMS ((const void *,
+ const void *));
+static void remove_state_duplicate_reservs PARAMS ((void));
+static void output_state PARAMS ((state_t));
+static void output_automaton_descriptions PARAMS ((void));
+static void output_statistics PARAMS ((FILE *));
+static void output_time_statistics PARAMS ((FILE *));
+static void generate PARAMS ((void));
+
+static void make_insn_alts_attr PARAMS ((void));
+static void make_internal_dfa_insn_code_attr PARAMS ((void));
+static void make_default_insn_latency_attr PARAMS ((void));
+static void make_bypass_attr PARAMS ((void));
+static const char *file_name_suffix PARAMS ((const char *));
+static const char *base_file_name PARAMS ((const char *));
+static void check_automata PARAMS ((void));
+static void add_automaton_state PARAMS ((state_t));
+static void form_important_insn_automata_lists PARAMS ((void));
+
+/* Undefined position. */
+static pos_t no_pos = 0;
+
+/* All IR is stored in the following obstack. */
+static struct obstack irp;
+
+\f
+
+/* This page contains code for work with variable length array (vla)
+ of pointers. We could be use only varray. But we add new lay
+ because we add elements very frequently and this could stress OS
+ allocator when varray is used only. */
+
+/* Start work with vla. */
+#define VLA_PTR_CREATE(vla, allocated_length, name) \
+ do \
+ { \
+ vla_ptr_t *vla_ptr = &(vla); \
+ \
+ VARRAY_GENERIC_PTR_INIT (vla_ptr->varray, allocated_length, name);\
+ vla_ptr->length = 0; \
+ } \
+ while (0)
+
+/* Finish work with the vla. */
+#define VLA_PTR_DELETE(vla) VARRAY_FREE ((vla).varray)
+
+/* Return start address of the vla. */
+#define VLA_PTR_BEGIN(vla) ((void *) &VARRAY_GENERIC_PTR ((vla).varray, 0))
+
+/* Address of the last element of the vla. Do not use side effects in
+ the macro argument. */
+#define VLA_PTR_LAST(vla) (&VARRAY_GENERIC_PTR ((vla).varray, \
+ (vla).length - 1))
+/* Nullify the vla. */
+#define VLA_PTR_NULLIFY(vla) ((vla).length = 0)
+
+/* Shorten the vla on given number bytes. */
+#define VLA_PTR_SHORTEN(vla, n) ((vla).length -= (n))
+
+/* Expand the vla on N elements. The values of new elements are
+ undefined. */
+#define VLA_PTR_EXPAND(vla, n) \
+ do { \
+ vla_ptr_t *expand_vla_ptr = &(vla); \
+ size_t new_length = (n) + expand_vla_ptr->length; \
+ \
+ if (VARRAY_SIZE (expand_vla_ptr->varray) < new_length) \
+ VARRAY_GROW (expand_vla_ptr->varray, \
+ (new_length - expand_vla_ptr->length < 128 \
+ ? expand_vla_ptr->length + 128 : new_length)); \
+ expand_vla_ptr->length = new_length; \
+ } while (0)
+
+/* Add element to the end of the vla. */
+#define VLA_PTR_ADD(vla, ptr) \
+ do { \
+ vla_ptr_t *vla_ptr = &(vla); \
+ \
+ VLA_PTR_EXPAND (*vla_ptr, 1); \
+ VARRAY_GENERIC_PTR (vla_ptr->varray, vla_ptr->length - 1) = (ptr);\
+ } while (0)
+
+/* Length of the vla in elements. */
+#define VLA_PTR_LENGTH(vla) ((vla).length)
+
+/* N-th element of the vla. */
+#define VLA_PTR(vla, n) VARRAY_GENERIC_PTR ((vla).varray, n)
+
+
+/* The following macros are analogous to the previous ones but for
+ VLAs of HOST WIDE INTs. */
+
+#define VLA_HWINT_CREATE(vla, allocated_length, name) \
+ do { \
+ vla_hwint_t *vla_ptr = &(vla); \
+ \
+ VARRAY_WIDE_INT_INIT (vla_ptr->varray, allocated_length, name); \
+ vla_ptr->length = 0; \
+ } while (0)
+
+#define VLA_HWINT_DELETE(vla) VARRAY_FREE ((vla).varray)
+
+#define VLA_HWINT_BEGIN(vla) (&VARRAY_WIDE_INT ((vla).varray, 0))
+
+/* Do not use side effects in the macro argument. */
+#define VLA_HWINT_LAST(vla) (&VARRAY_WIDE_INT ((vla).varray, \
+ (vla).length - 1))
+
+#define VLA_HWINT_NULLIFY(vla) ((vla).length = 0)
+
+#define VLA_HWINT_SHORTEN(vla, n) ((vla).length -= (n))
+
+#define VLA_HWINT_EXPAND(vla, n) \
+ do { \
+ vla_hwint_t *expand_vla_ptr = &(vla); \
+ size_t new_length = (n) + expand_vla_ptr->length; \
+ \
+ if (VARRAY_SIZE (expand_vla_ptr->varray) < new_length) \
+ VARRAY_GROW (expand_vla_ptr->varray, \
+ (new_length - expand_vla_ptr->length < 128 \
+ ? expand_vla_ptr->length + 128 : new_length)); \
+ expand_vla_ptr->length = new_length; \
+ } while (0)
+
+#define VLA_HWINT_ADD(vla, ptr) \
+ do { \
+ vla_hwint_t *vla_ptr = &(vla); \
+ \
+ VLA_HWINT_EXPAND (*vla_ptr, 1); \
+ VARRAY_WIDE_INT (vla_ptr->varray, vla_ptr->length - 1) = (ptr); \
+ } while (0)
+
+#define VLA_HWINT_LENGTH(vla) ((vla).length)
+
+#define VLA_HWINT(vla, n) VARRAY_WIDE_INT ((vla).varray, n)
+
+\f
+
+/* Options with the following names can be set up in automata_option
+ construction. Because the strings occur more one time we use the
+ macros. */
+
+#define NO_MINIMIZATION_OPTION "-no-minimization"
+
+#define W_OPTION "-w"
+
+#define NDFA_OPTION "-ndfa"
+
+/* The following flags are set up by function `initiate_automaton_gen'. */
+
+/* Make automata with nondeterministic reservation by insns (`-ndfa'). */
+static int ndfa_flag;
+
+/* Do not make minimization of DFA (`-no-minimization'). */
+static int no_minimization_flag;
+
+/* Value of this variable is number of automata being generated. The
+ actual number of automata may be less this value if there is not
+ sufficient number of units. This value is defined by argument of
+ option `-split' or by constructions automaton if the value is zero
+ (it is default value of the argument). */
+static int split_argument;
+
+/* Flag of output time statistics (`-time'). */
+static int time_flag;
+
+/* Flag of creation of description file which contains description of
+ result automaton and statistics information (`-v'). */
+static int v_flag;
+
+/* Flag of generating warning instead of error for non-critical errors
+ (`-w'). */
+static int w_flag;
+
+
+/* Output file for pipeline hazard recognizer (PHR) being generated.
+ The value is NULL if the file is not defined. */
+static FILE *output_file;
+
+/* Description file of PHR. The value is NULL if the file is not
+ created. */
+static FILE *output_description_file;
+
+/* PHR description file name. */
+static char *output_description_file_name;
+
+/* Value of the following variable is node representing description
+ being processed. This is start point of IR. */
+static struct description *description;
+
+\f
+
+/* This page contains description of IR structure (nodes). */
+
+enum decl_mode
+{
+ dm_unit,
+ dm_bypass,
+ dm_automaton,
+ dm_excl,
+ dm_presence,
+ dm_absence,
+ dm_reserv,
+ dm_insn_reserv
+};
+
+/* This describes define_cpu_unit and define_query_cpu_unit (see file
+ rtl.def). */
+struct unit_decl
+{
+ char *name;
+ /* NULL if the automaton name is absent. */
+ char *automaton_name;
+ /* If the following value is not zero, the cpu unit reservation is
+ described in define_query_cpu_unit. */
+ char query_p;
+
+ /* The following fields are defined by checker. */
+
+ /* The following field value is nonzero if the unit is used in an
+ regexp. */
+ char unit_is_used;
+ /* The following field value is order number (0, 1, ...) of given
+ unit. */
+ int unit_num;
+ /* The following field value is corresponding declaration of
+ automaton which was given in description. If the field value is
+ NULL then automaton in the unit declaration was absent. */
+ struct automaton_decl *automaton_decl;
+ /* The following field value is maximal cycle number (1, ...) on
+ which given unit occurs in insns. Zero value means that given
+ unit is not used in insns. */
+ int max_occ_cycle_num;
+ /* The following list contains units which conflict with given
+ unit. */
+ unit_set_el_t excl_list;
+ /* The following list contains units which are required to
+ reservation of given unit. */
+ unit_set_el_t presence_list;
+ /* The following list contains units which should be not present in
+ reservation for given unit. */
+ unit_set_el_t absence_list;
+ /* The following is used only when `query_p' has nonzero value.
+ This is query number for the unit. */
+ int query_num;
+
+ /* The following fields are defined by automaton generator. */
+
+ /* The following field value is number of the automaton to which
+ given unit belongs. */
+ int corresponding_automaton_num;
+};
+
+/* This describes define_bypass (see file rtl.def). */
+struct bypass_decl
+{
+ int latency;
+ char *out_insn_name;
+ char *in_insn_name;
+ char *bypass_guard_name;
+
+ /* The following fields are defined by checker. */
+
+ /* output and input insns of given bypass. */
+ struct insn_reserv_decl *out_insn_reserv;
+ struct insn_reserv_decl *in_insn_reserv;
+ /* The next bypass for given output insn. */
+ struct bypass_decl *next;
+};
+
+/* This describes define_automaton (see file rtl.def). */
+struct automaton_decl
+{
+ char *name;
+
+ /* The following fields are defined by automaton generator. */
+
+ /* The following field value is nonzero if the automaton is used in
+ an regexp definition. */
+ char automaton_is_used;
+
+ /* The following fields are defined by checker. */
+
+ /* The following field value is the corresponding automaton. This
+ field is not NULL only if the automaton is present in unit
+ declarations and the automatic partition on automata is not
+ used. */
+ automaton_t corresponding_automaton;
+};
+
+/* This describes unit relations: exclusion_set, presence_set, or
+ absence_set (see file rtl.def). */
+struct unit_rel_decl
+{
+ int names_num;
+ int first_list_length;
+ char *names [1];
+};
+
+/* This describes define_reservation (see file rtl.def). */
+struct reserv_decl
+{
+ char *name;
+ regexp_t regexp;
+
+ /* The following fields are defined by checker. */
+
+ /* The following field value is nonzero if the unit is used in an
+ regexp. */
+ char reserv_is_used;
+ /* The following field is used to check up cycle in expression
+ definition. */
+ int loop_pass_num;
+};
+
+/* This describes define_insn_reservartion (see file rtl.def). */
+struct insn_reserv_decl
+{
+ rtx condexp;
+ int default_latency;
+ regexp_t regexp;
+ char *name;
+
+ /* The following fields are defined by checker. */
+
+ /* The following field value is order number (0, 1, ...) of given
+ insn. */
+ int insn_num;
+ /* The following field value is list of bypasses in which given insn
+ is output insn. */
+ struct bypass_decl *bypass_list;
+
+ /* The following fields are defined by automaton generator. */
+
+ /* The following field is the insn regexp transformed that
+ the regexp has not optional regexp, repetition regexp, and an
+ reservation name (i.e. reservation identifiers are changed by the
+ corresponding regexp) and all alternations are the topest level
+ of the regexp. The value can be NULL only if it is special
+ insn `cycle advancing'. */
+ regexp_t transformed_regexp;
+ /* The following field value is list of arcs marked given
+ insn. The field is used in transfromation NDFA -> DFA. */
+ arc_t arcs_marked_by_insn;
+ /* The two following fields are used during minimization of a finite state
+ automaton. */
+ /* The field value is number of equivalence class of state into
+ which arc marked by given insn enters from a state (fixed during
+ an automaton minimization). */
+ int equiv_class_num;
+ /* The field value is state_alts of arc leaving a state (fixed
+ during an automaton minimization) and marked by given insn
+ enters. */
+ int state_alts;
+ /* The following member value is the list to automata which can be
+ changed by the insn issue. */
+ automata_list_el_t important_automata_list;
+ /* The following member is used to process insn once for output. */
+ int processed_p;
+};
+
+/* This contains a declaration mentioned above. */
+struct decl
+{
+ /* What node in the union? */
+ enum decl_mode mode;
+ pos_t pos;
+ union
+ {
+ struct unit_decl unit;
+ struct bypass_decl bypass;
+ struct automaton_decl automaton;
+ struct unit_rel_decl excl;
+ struct unit_rel_decl presence;
+ struct unit_rel_decl absence;
+ struct reserv_decl reserv;
+ struct insn_reserv_decl insn_reserv;
+ } decl;
+};
+
+/* The following structures represent parsed reservation strings. */
+enum regexp_mode
+{
+ rm_unit,
+ rm_reserv,
+ rm_nothing,
+ rm_sequence,
+ rm_repeat,
+ rm_allof,
+ rm_oneof
+};
+
+/* Cpu unit in reservation. */
+struct unit_regexp
+{
+ char *name;
+ struct unit_decl *unit_decl;
+};
+
+/* Define_reservation in a reservation. */
+struct reserv_regexp
+{
+ char *name;
+ struct reserv_decl *reserv_decl;
+};
+
+/* Absence of reservation (represented by string `nothing'). */
+struct nothing_regexp
+{
+ /* This used to be empty but ISO C doesn't allow that. */
+ char unused;
+};
+
+/* Representation of reservations separated by ',' (see file
+ rtl.def). */
+struct sequence_regexp
+{
+ int regexps_num;
+ regexp_t regexps [1];
+};
+
+/* Representation of construction `repeat' (see file rtl.def). */
+struct repeat_regexp
+{
+ int repeat_num;
+ regexp_t regexp;
+};
+
+/* Representation of reservations separated by '+' (see file
+ rtl.def). */
+struct allof_regexp
+{
+ int regexps_num;
+ regexp_t regexps [1];
+};
+
+/* Representation of reservations separated by '|' (see file
+ rtl.def). */
+struct oneof_regexp
+{
+ int regexps_num;
+ regexp_t regexps [1];
+};
+
+/* Representation of a reservation string. */
+struct regexp
+{
+ /* What node in the union? */
+ enum regexp_mode mode;
+ pos_t pos;
+ union
+ {
+ struct unit_regexp unit;
+ struct reserv_regexp reserv;
+ struct nothing_regexp nothing;
+ struct sequence_regexp sequence;
+ struct repeat_regexp repeat;
+ struct allof_regexp allof;
+ struct oneof_regexp oneof;
+ } regexp;
+};
+
+/* Reperesents description of pipeline hazard description based on
+ NDFA. */
+struct description
+{
+ int decls_num;
+
+ /* The following fields are defined by checker. */
+
+ /* The following fields values are correspondingly number of all
+ units, query units, and insns in the description. */
+ int units_num;
+ int query_units_num;
+ int insns_num;
+ /* The following field value is max length (in cycles) of
+ reservations of insns. The field value is defined only for
+ correct programs. */
+ int max_insn_reserv_cycles;
+
+ /* The following fields are defined by automaton generator. */
+
+ /* The following field value is the first automaton. */
+ automaton_t first_automaton;
+
+ /* The following field is created by pipeline hazard parser and
+ contains all declarations. We allocate additional entry for
+ special insn "cycle advancing" which is added by the automaton
+ generator. */
+ decl_t decls [1];
+};
+
+
+
+/* The following nodes are created in automaton checker. */
+
+/* The following nodes represent exclusion, presence, absence set for
+ cpu units. Each element are accessed through only one excl_list,
+ presence_list, absence_list. */
+struct unit_set_el
+{
+ struct unit_decl *unit_decl;
+ unit_set_el_t next_unit_set_el;
+};
+
+
+
+/* The following nodes are created in automaton generator. */
+
+/* The following node type describes state automaton. The state may
+ be deterministic or non-deterministic. Non-deterministic state has
+ several component states which represent alternative cpu units
+ reservations. The state also is used for describing a
+ deterministic reservation of automaton insn. */
+struct state
+{
+ /* The following member value is nonzero if there is a transition by
+ cycle advancing. */
+ int new_cycle_p;
+ /* The following field is list of processor unit reservations on
+ each cycle. */
+ reserv_sets_t reservs;
+ /* The following field is unique number of given state between other
+ states. */
+ int unique_num;
+ /* The following field value is automaton to which given state
+ belongs. */
+ automaton_t automaton;
+ /* The following field value is the first arc output from given
+ state. */
+ arc_t first_out_arc;
+ /* The following field is used to form NDFA. */
+ char it_was_placed_in_stack_for_NDFA_forming;
+ /* The following field is used to form DFA. */
+ char it_was_placed_in_stack_for_DFA_forming;
+ /* The following field is used to transform NDFA to DFA. The field
+ value is not NULL if the state is a compound state. In this case
+ the value of field `unit_sets_list' is NULL. All states in the
+ list are in the hash table. The list is formed through field
+ `next_sorted_alt_state'. */
+ alt_state_t component_states;
+ /* The following field is used for passing graph of states. */
+ int pass_num;
+ /* The list of states belonging to one equivalence class is formed
+ with the aid of the following field. */
+ state_t next_equiv_class_state;
+ /* The two following fields are used during minimization of a finite
+ state automaton. */
+ int equiv_class_num_1, equiv_class_num_2;
+ /* The following field is used during minimization of a finite state
+ automaton. The field value is state corresponding to equivalence
+ class to which given state belongs. */
+ state_t equiv_class_state;
+ /* The following field value is the order number of given state.
+ The states in final DFA is enumerated with the aid of the
+ following field. */
+ int order_state_num;
+ /* This member is used for passing states for searching minimal
+ delay time. */
+ int state_pass_num;
+ /* The following member is used to evaluate min issue delay of insn
+ for a state. */
+ int min_insn_issue_delay;
+ /* The following member is used to evaluate max issue rate of the
+ processor. The value of the member is maximal length of the path
+ from given state no containing arcs marked by special insn `cycle
+ advancing'. */
+ int longest_path_length;
+};
+
+/* The following macro is an initial value of member
+ `longest_path_length' of a state. */
+#define UNDEFINED_LONGEST_PATH_LENGTH -1
+
+/* Automaton arc. */
+struct arc
+{
+ /* The following field refers for the state into which given arc
+ enters. */
+ state_t to_state;
+ /* The following field describes that the insn issue (with cycle
+ advancing for special insn `cycle advancing' and without cycle
+ advancing for others) makes transition from given state to
+ another given state. */
+ ainsn_t insn;
+ /* The following field value is the next arc output from the same
+ state. */
+ arc_t next_out_arc;
+ /* List of arcs marked given insn is formed with the following
+ field. The field is used in transfromation NDFA -> DFA. */
+ arc_t next_arc_marked_by_insn;
+ /* The following field is defined if NDFA_FLAG is zero. The member
+ value is number of alternative reservations which can be used for
+ transition for given state by given insn. */
+ int state_alts;
+};
+
+/* The following node type describes a deterministic alternative in
+ non-deterministic state which characterizes cpu unit reservations
+ of automaton insn or which is part of NDFA. */
+struct alt_state
+{
+ /* The following field is a determinist state which characterizes
+ unit reservations of the instruction. */
+ state_t state;
+ /* The following field refers to the next state which characterizes
+ unit reservations of the instruction. */
+ alt_state_t next_alt_state;
+ /* The following field refers to the next state in sorted list. */
+ alt_state_t next_sorted_alt_state;
+};
+
+/* The following node type describes insn of automaton. They are
+ labels of FA arcs. */
+struct ainsn
+{
+ /* The following field value is the corresponding insn declaration
+ of description. */
+ struct insn_reserv_decl *insn_reserv_decl;
+ /* The following field value is the next insn declaration for an
+ automaton. */
+ ainsn_t next_ainsn;
+ /* The following field is states which characterize automaton unit
+ reservations of the instruction. The value can be NULL only if it
+ is special insn `cycle advancing'. */
+ alt_state_t alt_states;
+ /* The following field is sorted list of states which characterize
+ automaton unit reservations of the instruction. The value can be
+ NULL only if it is special insn `cycle advancing'. */
+ alt_state_t sorted_alt_states;
+ /* The following field refers the next automaton insn with
+ the same reservations. */
+ ainsn_t next_same_reservs_insn;
+ /* The following field is flag of the first automaton insn with the
+ same reservations in the declaration list. Only arcs marked such
+ insn is present in the automaton. This significantly decreases
+ memory requirements especially when several automata are
+ formed. */
+ char first_insn_with_same_reservs;
+ /* The following member has nonzero value if there is arc from state of
+ the automaton marked by the ainsn. */
+ char arc_exists_p;
+ /* Cyclic list of insns of a equivalence class is formed with the
+ aid of the following field. */
+ ainsn_t next_equiv_class_insn;
+ /* The following field value is nonzero if the insn declaration is
+ the first insn declaration with given equivalence number. */
+ char first_ainsn_with_given_equialence_num;
+ /* The following field is number of class of equivalence of insns.
+ It is necessary because many insns may be equivalent with the
+ point of view of pipeline hazards. */
+ int insn_equiv_class_num;
+ /* The following member value is TRUE if there is an arc in the
+ automaton marked by the insn into another state. In other
+ words, the insn can change the state of the automaton. */
+ int important_p;
+};
+
+/* The folowing describes an automaton for PHR. */
+struct automaton
+{
+ /* The following field value is the list of insn declarations for
+ given automaton. */
+ ainsn_t ainsn_list;
+ /* The following field value is the corresponding automaton
+ declaration. This field is not NULL only if the automatic
+ partition on automata is not used. */
+ struct automaton_decl *corresponding_automaton_decl;
+ /* The following field value is the next automaton. */
+ automaton_t next_automaton;
+ /* The following field is start state of FA. There are not unit
+ reservations in the state. */
+ state_t start_state;
+ /* The following field value is number of equivalence classes of
+ insns (see field `insn_equiv_class_num' in
+ `insn_reserv_decl'). */
+ int insn_equiv_classes_num;
+ /* The following field value is number of states of final DFA. */
+ int achieved_states_num;
+ /* The following field value is the order number (0, 1, ...) of
+ given automaton. */
+ int automaton_order_num;
+ /* The following fields contain statistics information about
+ building automaton. */
+ int NDFA_states_num, DFA_states_num;
+ /* The following field value is defined only if minimization of DFA
+ is used. */
+ int minimal_DFA_states_num;
+ int NDFA_arcs_num, DFA_arcs_num;
+ /* The following field value is defined only if minimization of DFA
+ is used. */
+ int minimal_DFA_arcs_num;
+ /* The following two members refer for two table state x ainsn ->
+ int. */
+ state_ainsn_table_t trans_table;
+ state_ainsn_table_t state_alts_table;
+ /* The following member value is maximal value of min issue delay
+ for insns of the automaton. */
+ int max_min_delay;
+ /* Usually min issue delay is small and we can place several (2, 4,
+ 8) elements in one vector element. So the compression factor can
+ be 1 (no compression), 2, 4, 8. */
+ int min_issue_delay_table_compression_factor;
+};
+
+/* The following is the element of the list of automata. */
+struct automata_list_el
+{
+ /* The automaton itself. */
+ automaton_t automaton;
+ /* The next automata set element. */
+ automata_list_el_t next_automata_list_el;
+};
+
+/* The following structure describes a table state X ainsn -> int(>= 0). */
+struct state_ainsn_table
+{
+ /* Automaton to which given table belongs. */
+ automaton_t automaton;
+ /* The following tree vectors for comb vector implementation of the
+ table. */
+ vla_hwint_t comb_vect;
+ vla_hwint_t check_vect;
+ vla_hwint_t base_vect;
+ /* This is simple implementation of the table. */
+ vla_hwint_t full_vect;
+ /* Minimal and maximal values of the previous vectors. */
+ int min_comb_vect_el_value, max_comb_vect_el_value;
+ int min_base_vect_el_value, max_base_vect_el_value;
+};
+
+/* Create IR structure (node). */
+static void *
+create_node (size)
+ size_t size;
+{
+ void *result;
+
+ obstack_blank (&irp, size);
+ result = obstack_base (&irp);
+ obstack_finish (&irp);
+ /* Default values of members are NULL and zero. */
+ memset (result, 0, size);
+ return result;
+}
+
+/* Copy IR structure (node). */
+static void *
+copy_node (from, size)
+ void *from;
+ size_t size;
+{
+ void *result;
+ result = create_node (size);
+ memcpy (result, from, size);
+ return result;
+}
+
+/* The function checks that NAME does not contain quotes (`"'). */
+static char *
+check_name (name, pos)
+ char * name;
+ pos_t pos ATTRIBUTE_UNUSED;
+{
+ char *str;
+
+ for (str = name; *str != '\0'; str++)
+ if (*str == '\"')
+ error ("Name `%s' contains quotes", name);
+ return name;
+}
+
+/* Pointers top all declartions during IR generation are stored in the
+ following. */
+static vla_ptr_t decls;
+
+/* Given a pointer to a (char *) and a separator, return a alloc'ed
+ string containing the next separated element, taking parentheses
+ into account if PAR_FLAG has nonzero value. Advance the pointer to
+ after the string scanned, or the end-of-string. Return NULL if at
+ end of string. */
+static char *
+next_sep_el (pstr, sep, par_flag)
+ char **pstr;
+ int sep;
+ int par_flag;
+{
+ char *out_str;
+ char *p;
+ int pars_num;
+ int n_spaces;
+
+ /* Remove leading whitespaces. */
+ while (isspace ((int) **pstr))
+ (*pstr)++;
+
+ if (**pstr == '\0')
+ return NULL;
+
+ n_spaces = 0;
+ for (pars_num = 0, p = *pstr; *p != '\0'; p++)
+ {
+ if (par_flag && *p == '(')
+ pars_num++;
+ else if (par_flag && *p == ')')
+ pars_num--;
+ else if (pars_num == 0 && *p == sep)
+ break;
+ if (pars_num == 0 && isspace ((int) *p))
+ n_spaces++;
+ else
+ {
+ for (; n_spaces != 0; n_spaces--)
+ obstack_1grow (&irp, p [-n_spaces]);
+ obstack_1grow (&irp, *p);
+ }
+ }
+ obstack_1grow (&irp, '\0');
+ out_str = obstack_base (&irp);
+ obstack_finish (&irp);
+
+ *pstr = p;
+ if (**pstr == sep)
+ (*pstr)++;
+
+ return out_str;
+}
+
+/* Given a string and a separator, return the number of separated
+ elements in it, taking parentheses into account if PAR_FLAG has
+ nonzero value. Return 0 for the null string, -1 if parantheses is
+ not balanced. */
+static int
+n_sep_els (s, sep, par_flag)
+ char *s;
+ int sep;
+ int par_flag;
+{
+ int n;
+ int pars_num;
+
+ if (*s == '\0')
+ return 0;
+
+ for (pars_num = 0, n = 1; *s; s++)
+ if (par_flag && *s == '(')
+ pars_num++;
+ else if (par_flag && *s == ')')
+ pars_num--;
+ else if (pars_num == 0 && *s == sep)
+ n++;
+
+ return (pars_num != 0 ? -1 : n);
+}
+
+/* Given a string and a separator, return vector of strings which are
+ elements in the string and number of elements through els_num.
+ Take parentheses into account if PAR_FLAG has nonzero value.
+ Return 0 for the null string, -1 if parantheses are not balanced. */
+static char **
+get_str_vect (str, els_num, sep, par_flag)
+ char *str;
+ int *els_num;
+ int sep;
+ int par_flag;
+{
+ int i;
+ char **vect;
+ char **pstr;
+
+ *els_num = n_sep_els (str, sep, par_flag);
+ if (*els_num <= 0)
+ return NULL;
+ obstack_blank (&irp, sizeof (char *) * (*els_num));
+ vect = (char **) obstack_base (&irp);
+ obstack_finish (&irp);
+ pstr = &str;
+ for (i = 0; i < *els_num; i++)
+ vect [i] = next_sep_el (pstr, sep, par_flag);
+ if (next_sep_el (pstr, sep, par_flag) != NULL)
+ abort ();
+ return vect;
+}
+
+/* Process a DEFINE_CPU_UNIT.
+
+ This gives information about a unit contained in CPU. We fill a
+ struct unit_decl with information used later by `expand_automata'. */
+void
+gen_cpu_unit (def)
+ rtx def;
+{
+ decl_t decl;
+ char **str_cpu_units;
+ int vect_length;
+ int i;
+
+ str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0);
+ if (str_cpu_units == NULL)
+ fatal ("invalid string `%s' in define_cpu_unit", XSTR (def, 0));
+ for (i = 0; i < vect_length; i++)
+ {
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_unit;
+ decl->pos = 0;
+ decl->decl.unit.name = check_name (str_cpu_units [i], decl->pos);
+ decl->decl.unit.automaton_name = (char *) XSTR (def, 1);
+ decl->decl.unit.query_p = 0;
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+ }
+}
+
+/* Process a DEFINE_QUERY_CPU_UNIT.
+
+ This gives information about a unit contained in CPU. We fill a
+ struct unit_decl with information used later by `expand_automata'. */
+void
+gen_query_cpu_unit (def)
+ rtx def;
+{
+ decl_t decl;
+ char **str_cpu_units;
+ int vect_length;
+ int i;
+
+ str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0);
+ if (str_cpu_units == NULL)
+ fatal ("invalid string `%s' in define_query_cpu_unit", XSTR (def, 0));
+ for (i = 0; i < vect_length; i++)
+ {
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_unit;
+ decl->pos = 0;
+ decl->decl.unit.name = check_name (str_cpu_units [i], decl->pos);
+ decl->decl.unit.automaton_name = (char *) XSTR (def, 1);
+ decl->decl.unit.query_p = 1;
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+ }
+}
+
+/* Process a DEFINE_BYPASS.
+
+ This gives information about a unit contained in the CPU. We fill
+ in a struct bypass_decl with information used later by
+ `expand_automata'. */
+void
+gen_bypass (def)
+ rtx def;
+{
+ decl_t decl;
+ char **out_insns;
+ int out_length;
+ char **in_insns;
+ int in_length;
+ int i, j;
+
+ out_insns = get_str_vect ((char *) XSTR (def, 1), &out_length, ',', 0);
+ if (out_insns == NULL)
+ fatal ("invalid string `%s' in define_bypass", XSTR (def, 1));
+ in_insns = get_str_vect ((char *) XSTR (def, 2), &in_length, ',', 0);
+ if (in_insns == NULL)
+ fatal ("invalid string `%s' in define_bypass", XSTR (def, 2));
+ for (i = 0; i < out_length; i++)
+ for (j = 0; j < in_length; j++)
+ {
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_bypass;
+ decl->pos = 0;
+ decl->decl.bypass.latency = XINT (def, 0);
+ decl->decl.bypass.out_insn_name = out_insns [i];
+ decl->decl.bypass.in_insn_name = in_insns [j];
+ decl->decl.bypass.bypass_guard_name = (char *) XSTR (def, 3);
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+ }
+}
+
+/* Process a EXCLUSION_SET.
+
+ This gives information about a cpu unit conflicts. We fill a
+ struct unit_rel_decl (excl) with information used later by
+ `expand_automata'. */
+void
+gen_excl_set (def)
+ rtx def;
+{
+ decl_t decl;
+ char **first_str_cpu_units;
+ char **second_str_cpu_units;
+ int first_vect_length;
+ int length;
+ int i;
+
+ first_str_cpu_units
+ = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0);
+ if (first_str_cpu_units == NULL)
+ fatal ("invalid first string `%s' in exclusion_set", XSTR (def, 0));
+ second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',',
+ 0);
+ if (second_str_cpu_units == NULL)
+ fatal ("invalid second string `%s' in exclusion_set", XSTR (def, 1));
+ length += first_vect_length;
+ decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *));
+ decl->mode = dm_excl;
+ decl->pos = 0;
+ decl->decl.excl.names_num = length;
+ decl->decl.excl.first_list_length = first_vect_length;
+ for (i = 0; i < length; i++)
+ if (i < first_vect_length)
+ decl->decl.excl.names [i] = first_str_cpu_units [i];
+ else
+ decl->decl.excl.names [i] = second_str_cpu_units [i - first_vect_length];
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+}
+
+/* Process a PRESENCE_SET.
+
+ This gives information about a cpu unit reservation requirements.
+ We fill a struct unit_rel_decl (presence) with information used
+ later by `expand_automata'. */
+void
+gen_presence_set (def)
+ rtx def;
+{
+ decl_t decl;
+ char **first_str_cpu_units;
+ char **second_str_cpu_units;
+ int first_vect_length;
+ int length;
+ int i;
+
+ first_str_cpu_units
+ = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0);
+ if (first_str_cpu_units == NULL)
+ fatal ("invalid first string `%s' in presence_set", XSTR (def, 0));
+ second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',',
+ 0);
+ if (second_str_cpu_units == NULL)
+ fatal ("invalid second string `%s' in presence_set", XSTR (def, 1));
+ length += first_vect_length;
+ decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *));
+ decl->mode = dm_presence;
+ decl->pos = 0;
+ decl->decl.presence.names_num = length;
+ decl->decl.presence.first_list_length = first_vect_length;
+ for (i = 0; i < length; i++)
+ if (i < first_vect_length)
+ decl->decl.presence.names [i] = first_str_cpu_units [i];
+ else
+ decl->decl.presence.names [i]
+ = second_str_cpu_units [i - first_vect_length];
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+}
+
+/* Process a ABSENCE_SET.
+
+ This gives information about a cpu unit reservation requirements.
+ We fill a struct unit_rel_decl (absence) with information used
+ later by `expand_automata'. */
+void
+gen_absence_set (def)
+ rtx def;
+{
+ decl_t decl;
+ char **first_str_cpu_units;
+ char **second_str_cpu_units;
+ int first_vect_length;
+ int length;
+ int i;
+
+ first_str_cpu_units
+ = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0);
+ if (first_str_cpu_units == NULL)
+ fatal ("invalid first string `%s' in absence_set", XSTR (def, 0));
+ second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',',
+ 0);
+ if (second_str_cpu_units == NULL)
+ fatal ("invalid second string `%s' in absence_set", XSTR (def, 1));
+ length += first_vect_length;
+ decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *));
+ decl->mode = dm_absence;
+ decl->pos = 0;
+ decl->decl.absence.names_num = length;
+ decl->decl.absence.first_list_length = first_vect_length;
+ for (i = 0; i < length; i++)
+ if (i < first_vect_length)
+ decl->decl.absence.names [i] = first_str_cpu_units [i];
+ else
+ decl->decl.absence.names [i]
+ = second_str_cpu_units [i - first_vect_length];
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+}
+
+/* Process a DEFINE_AUTOMATON.
+
+ This gives information about a finite state automaton used for
+ recognizing pipeline hazards. We fill a struct automaton_decl
+ with information used later by `expand_automata'. */
+void
+gen_automaton (def)
+ rtx def;
+{
+ decl_t decl;
+ char **str_automata;
+ int vect_length;
+ int i;
+
+ str_automata = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0);
+ if (str_automata == NULL)
+ fatal ("invalid string `%s' in define_automaton", XSTR (def, 0));
+ for (i = 0; i < vect_length; i++)
+ {
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_automaton;
+ decl->pos = 0;
+ decl->decl.automaton.name = check_name (str_automata [i], decl->pos);
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+ }
+}
+
+/* Process a AUTOMATA_OPTION.
+
+ This gives information how to generate finite state automaton used
+ for recognizing pipeline hazards. */
+void
+gen_automata_option (def)
+ rtx def;
+{
+ if (strcmp ((char *) XSTR (def, 0), NO_MINIMIZATION_OPTION + 1) == 0)
+ no_minimization_flag = 1;
+ else if (strcmp ((char *) XSTR (def, 0), W_OPTION + 1) == 0)
+ w_flag = 1;
+ else if (strcmp ((char *) XSTR (def, 0), NDFA_OPTION + 1) == 0)
+ ndfa_flag = 1;
+ else
+ fatal ("invalid option `%s' in automata_option", XSTR (def, 0));
+}
+
+/* Name in reservation to denote absence reservation. */
+#define NOTHING_NAME "nothing"
+
+/* The following string contains original reservation string being
+ parsed. */
+static char *reserv_str;
+
+/* Parse an element in STR. */
+static regexp_t
+gen_regexp_el (str)
+ char *str;
+{
+ regexp_t regexp;
+ int len;
+
+ if (*str == '(')
+ {
+ len = strlen (str);
+ if (str [len - 1] != ')')
+ fatal ("garbage after ) in reservation `%s'", reserv_str);
+ str [len - 1] = '\0';
+ regexp = gen_regexp_sequence (str + 1);
+ }
+ else if (strcmp (str, NOTHING_NAME) == 0)
+ {
+ regexp = create_node (sizeof (struct decl));
+ regexp->mode = rm_nothing;
+ }
+ else
+ {
+ regexp = create_node (sizeof (struct decl));
+ regexp->mode = rm_unit;
+ regexp->regexp.unit.name = str;
+ }
+ return regexp;
+}
+
+/* Parse construction `repeat' in STR. */
+static regexp_t
+gen_regexp_repeat (str)
+ char *str;
+{
+ regexp_t regexp;
+ regexp_t repeat;
+ char **repeat_vect;
+ int els_num;
+ int i;
+
+ repeat_vect = get_str_vect (str, &els_num, '*', 1);
+ if (repeat_vect == NULL)
+ fatal ("invalid `%s' in reservation `%s'", str, reserv_str);
+ if (els_num > 1)
+ {
+ regexp = gen_regexp_el (repeat_vect [0]);
+ for (i = 1; i < els_num; i++)
+ {
+ repeat = create_node (sizeof (struct regexp));
+ repeat->mode = rm_repeat;
+ repeat->regexp.repeat.regexp = regexp;
+ repeat->regexp.repeat.repeat_num = atoi (repeat_vect [i]);
+ if (repeat->regexp.repeat.repeat_num <= 1)
+ fatal ("repetition `%s' <= 1 in reservation `%s'",
+ str, reserv_str);
+ regexp = repeat;
+ }
+ return regexp;
+ }
+ else
+ return gen_regexp_el (str);
+}
+
+/* Parse reservation STR which possibly contains separator '+'. */
+static regexp_t
+gen_regexp_allof (str)
+ char *str;
+{
+ regexp_t allof;
+ char **allof_vect;
+ int els_num;
+ int i;
+
+ allof_vect = get_str_vect (str, &els_num, '+', 1);
+ if (allof_vect == NULL)
+ fatal ("invalid `%s' in reservation `%s'", str, reserv_str);
+ if (els_num > 1)
+ {
+ allof = create_node (sizeof (struct regexp)
+ + sizeof (regexp_t) * (els_num - 1));
+ allof->mode = rm_allof;
+ allof->regexp.allof.regexps_num = els_num;
+ for (i = 0; i < els_num; i++)
+ allof->regexp.allof.regexps [i] = gen_regexp_repeat (allof_vect [i]);
+ return allof;
+ }
+ else
+ return gen_regexp_repeat (str);
+}
+
+/* Parse reservation STR which possibly contains separator '|'. */
+static regexp_t
+gen_regexp_oneof (str)
+ char *str;
+{
+ regexp_t oneof;
+ char **oneof_vect;
+ int els_num;
+ int i;
+
+ oneof_vect = get_str_vect (str, &els_num, '|', 1);
+ if (oneof_vect == NULL)
+ fatal ("invalid `%s' in reservation `%s'", str, reserv_str);
+ if (els_num > 1)
+ {
+ oneof = create_node (sizeof (struct regexp)
+ + sizeof (regexp_t) * (els_num - 1));
+ oneof->mode = rm_oneof;
+ oneof->regexp.oneof.regexps_num = els_num;
+ for (i = 0; i < els_num; i++)
+ oneof->regexp.oneof.regexps [i] = gen_regexp_allof (oneof_vect [i]);
+ return oneof;
+ }
+ else
+ return gen_regexp_allof (str);
+}
+
+/* Parse reservation STR which possibly contains separator ','. */
+static regexp_t
+gen_regexp_sequence (str)
+ char *str;
+{
+ regexp_t sequence;
+ char **sequence_vect;
+ int els_num;
+ int i;
+
+ sequence_vect = get_str_vect (str, &els_num, ',', 1);
+ if (els_num > 1)
+ {
+ sequence = create_node (sizeof (struct regexp)
+ + sizeof (regexp_t) * (els_num - 1));
+ sequence->mode = rm_sequence;
+ sequence->regexp.sequence.regexps_num = els_num;
+ for (i = 0; i < els_num; i++)
+ sequence->regexp.sequence.regexps [i]
+ = gen_regexp_oneof (sequence_vect [i]);
+ return sequence;
+ }
+ else
+ return gen_regexp_oneof (str);
+}
+
+/* Parse construction reservation STR. */
+static regexp_t
+gen_regexp (str)
+ char *str;
+{
+ reserv_str = str;
+ return gen_regexp_sequence (str);;
+}
+
+/* Process a DEFINE_RESERVATION.
+
+ This gives information about a reservation of cpu units. We fill
+ in a struct reserv_decl with information used later by
+ `expand_automata'. */
+void
+gen_reserv (def)
+ rtx def;
+{
+ decl_t decl;
+
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_reserv;
+ decl->pos = 0;
+ decl->decl.reserv.name = check_name ((char *) XSTR (def, 0), decl->pos);
+ decl->decl.reserv.regexp = gen_regexp ((char *) XSTR (def, 1));
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+}
+
+/* Process a DEFINE_INSN_RESERVATION.
+
+ This gives information about the reservation of cpu units by an
+ insn. We fill a struct insn_reserv_decl with information used
+ later by `expand_automata'. */
+void
+gen_insn_reserv (def)
+ rtx def;
+{
+ decl_t decl;
+
+ decl = create_node (sizeof (struct decl));
+ decl->mode = dm_insn_reserv;
+ decl->pos = 0;
+ decl->decl.insn_reserv.name = check_name ((char *) XSTR (def, 0), decl->pos);
+ decl->decl.insn_reserv.default_latency = XINT (def, 1);
+ decl->decl.insn_reserv.condexp = XEXP (def, 2);
+ decl->decl.insn_reserv.regexp = gen_regexp ((char *) XSTR (def, 3));
+ VLA_PTR_ADD (decls, decl);
+ num_dfa_decls++;
+}
+
+\f
+
+/* The function evaluates hash value (0..UINT_MAX) of string. */
+static unsigned
+string_hash (string)
+ const char *string;
+{
+ unsigned result, i;
+
+ for (result = i = 0;*string++ != '\0'; i++)
+ result += ((unsigned char) *string << (i % CHAR_BIT));
+ return result;
+}
+
+\f
+
+/* This page contains abstract data `table of automaton declarations'.
+ Elements of the table is nodes representing automaton declarations.
+ Key of the table elements is name of given automaton. Rememeber
+ that automaton names have own space. */
+
+/* The function evaluates hash value of a automaton declaration. The
+ function is used by abstract data `hashtab'. The function returns
+ hash value (0..UINT_MAX) of given automaton declaration. */
+static unsigned
+automaton_decl_hash (automaton_decl)
+ const void *automaton_decl;
+{
+ const decl_t decl = (decl_t) automaton_decl;
+
+ if (decl->mode == dm_automaton && decl->decl.automaton.name == NULL)
+ abort ();
+ return string_hash (decl->decl.automaton.name);
+}
+
+/* The function tests automaton declarations on equality of their
+ keys. The function is used by abstract data `hashtab'. The
+ function returns 1 if the declarations have the same key, 0
+ otherwise. */
+static int
+automaton_decl_eq_p (automaton_decl_1, automaton_decl_2)
+ const void* automaton_decl_1;
+ const void* automaton_decl_2;
+{
+ const decl_t decl1 = (decl_t) automaton_decl_1;
+ const decl_t decl2 = (decl_t) automaton_decl_2;
+
+ if (decl1->mode != dm_automaton || decl1->decl.automaton.name == NULL
+ || decl2->mode != dm_automaton || decl2->decl.automaton.name == NULL)
+ abort ();
+ return strcmp (decl1->decl.automaton.name, decl2->decl.automaton.name) == 0;
+}
+
+/* The automaton declaration table itself is represented by the
+ following variable. */
+static htab_t automaton_decl_table;
+
+/* The function inserts automaton declaration into the table. The
+ function does nothing if an automaton declaration with the same key
+ exists already in the table. The function returns automaton
+ declaration node in the table with the same key as given automaton
+ declaration node. */
+static decl_t
+insert_automaton_decl (automaton_decl)
+ decl_t automaton_decl;
+{
+ void **entry_ptr;
+
+ entry_ptr = htab_find_slot (automaton_decl_table, automaton_decl, 1);
+ if (*entry_ptr == NULL)
+ *entry_ptr = (void *) automaton_decl;
+ return (decl_t) *entry_ptr;
+}
+
+/* The following variable value is node representing automaton
+ declaration. The node used for searching automaton declaration
+ with given name. */
+static struct decl work_automaton_decl;
+
+/* The function searches for automaton declaration in the table with
+ the same key as node representing name of the automaton
+ declaration. The function returns node found in the table, NULL if
+ such node does not exist in the table. */
+static decl_t
+find_automaton_decl (name)
+ char *name;
+{
+ void *entry;
+
+ work_automaton_decl.decl.automaton.name = name;
+ entry = htab_find (automaton_decl_table, &work_automaton_decl);
+ return (decl_t) entry;
+}
+
+/* The function creates empty automaton declaration table and node
+ representing automaton declaration and used for searching automaton
+ declaration with given name. The function must be called only once
+ before any work with the automaton declaration table. */
+static void
+initiate_automaton_decl_table ()
+{
+ work_automaton_decl.mode = dm_automaton;
+ automaton_decl_table = htab_create (10, automaton_decl_hash,
+ automaton_decl_eq_p, (htab_del) 0);
+}
+
+/* The function deletes the automaton declaration table. Only call of
+ function `initiate_automaton_decl_table' is possible immediately
+ after this function call. */
+static void
+finish_automaton_decl_table ()
+{
+ htab_delete (automaton_decl_table);
+}
+
+\f
+
+/* This page contains abstract data `table of insn declarations'.
+ Elements of the table is nodes representing insn declarations. Key
+ of the table elements is name of given insn (in corresponding
+ define_insn_reservation). Rememeber that insn names have own
+ space. */
+
+/* The function evaluates hash value of a insn declaration. The
+ function is used by abstract data `hashtab'. The function returns
+ hash value (0..UINT_MAX) of given insn declaration. */
+static unsigned
+insn_decl_hash (insn_decl)
+ const void *insn_decl;
+{
+ const decl_t decl = (decl_t) insn_decl;
+
+ if (decl->mode != dm_insn_reserv || decl->decl.insn_reserv.name == NULL)
+ abort ();
+ return string_hash (decl->decl.insn_reserv.name);
+}
+
+/* The function tests insn declarations on equality of their keys.
+ The function is used by abstract data `hashtab'. The function
+ returns 1 if declarations have the same key, 0 otherwise. */
+static int
+insn_decl_eq_p (insn_decl_1, insn_decl_2)
+ const void *insn_decl_1;
+ const void *insn_decl_2;
+{
+ const decl_t decl1 = (decl_t) insn_decl_1;
+ const decl_t decl2 = (decl_t) insn_decl_2;
+
+ if (decl1->mode != dm_insn_reserv || decl1->decl.insn_reserv.name == NULL
+ || decl2->mode != dm_insn_reserv || decl2->decl.insn_reserv.name == NULL)
+ abort ();
+ return strcmp (decl1->decl.insn_reserv.name,
+ decl2->decl.insn_reserv.name) == 0;
+}
+
+/* The insn declaration table itself is represented by the following
+ variable. The table does not contain insn reservation
+ declarations. */
+static htab_t insn_decl_table;
+
+/* The function inserts insn declaration into the table. The function
+ does nothing if an insn declaration with the same key exists
+ already in the table. The function returns insn declaration node
+ in the table with the same key as given insn declaration node. */
+static decl_t
+insert_insn_decl (insn_decl)
+ decl_t insn_decl;
+{
+ void **entry_ptr;
+
+ entry_ptr = htab_find_slot (insn_decl_table, insn_decl, 1);
+ if (*entry_ptr == NULL)
+ *entry_ptr = (void *) insn_decl;
+ return (decl_t) *entry_ptr;
+}
+
+/* The following variable value is node representing insn reservation
+ declaration. The node used for searching insn reservation
+ declaration with given name. */
+static struct decl work_insn_decl;
+
+/* The function searches for insn reservation declaration in the table
+ with the same key as node representing name of the insn reservation
+ declaration. The function returns node found in the table, NULL if
+ such node does not exist in the table. */
+static decl_t
+find_insn_decl (name)
+ char *name;
+{
+ void *entry;
+
+ work_insn_decl.decl.insn_reserv.name = name;
+ entry = htab_find (insn_decl_table, &work_insn_decl);
+ return (decl_t) entry;
+}
+
+/* The function creates empty insn declaration table and node
+ representing insn declaration and used for searching insn
+ declaration with given name. The function must be called only once
+ before any work with the insn declaration table. */
+static void
+initiate_insn_decl_table ()
+{
+ work_insn_decl.mode = dm_insn_reserv;
+ insn_decl_table = htab_create (10, insn_decl_hash, insn_decl_eq_p,
+ (htab_del) 0);
+}
+
+/* The function deletes the insn declaration table. Only call of
+ function `initiate_insn_decl_table' is possible immediately after
+ this function call. */
+static void
+finish_insn_decl_table ()
+{
+ htab_delete (insn_decl_table);
+}
+
+\f
+
+/* This page contains abstract data `table of declarations'. Elements
+ of the table is nodes representing declarations (of units and
+ reservations). Key of the table elements is names of given
+ declarations. */
+
+/* The function evaluates hash value of a declaration. The function
+ is used by abstract data `hashtab'. The function returns hash
+ value (0..UINT_MAX) of given declaration. */
+static unsigned
+decl_hash (decl)
+ const void *decl;
+{
+ const decl_t d = (const decl_t) decl;
+
+ if ((d->mode != dm_unit || d->decl.unit.name == NULL)
+ && (d->mode != dm_reserv || d->decl.reserv.name == NULL))
+ abort ();
+ return string_hash (d->mode == dm_unit
+ ? d->decl.unit.name : d->decl.reserv.name);
+}
+
+/* The function tests declarations on equality of their keys. The
+ function is used by abstract data `hashtab'. The function
+ returns 1 if the declarations have the same key, 0 otherwise. */
+static int
+decl_eq_p (decl_1, decl_2)
+ const void *decl_1;
+ const void *decl_2;
+{
+ const decl_t d1 = (const decl_t) decl_1;
+ const decl_t d2 = (const decl_t) decl_2;
+
+ if (((d1->mode != dm_unit || d1->decl.unit.name == NULL)
+ && (d1->mode != dm_reserv || d1->decl.reserv.name == NULL))
+ || ((d2->mode != dm_unit || d2->decl.unit.name == NULL)
+ && (d2->mode != dm_reserv || d2->decl.reserv.name == NULL)))
+ abort ();
+ return strcmp ((d1->mode == dm_unit
+ ? d1->decl.unit.name : d1->decl.reserv.name),
+ (d2->mode == dm_unit
+ ? d2->decl.unit.name : d2->decl.reserv.name)) == 0;
+}
+
+/* The declaration table itself is represented by the following
+ variable. */
+static htab_t decl_table;
+
+/* The function inserts declaration into the table. The function does
+ nothing if a declaration with the same key exists already in the
+ table. The function returns declaration node in the table with the
+ same key as given declaration node. */
+
+static decl_t
+insert_decl (decl)
+ decl_t decl;
+{
+ void **entry_ptr;
+
+ entry_ptr = htab_find_slot (decl_table, decl, 1);
+ if (*entry_ptr == NULL)
+ *entry_ptr = (void *) decl;
+ return (decl_t) *entry_ptr;
+}
+
+/* The following variable value is node representing declaration. The
+ node used for searching declaration with given name. */
+static struct decl work_decl;
+
+/* The function searches for declaration in the table with the same
+ key as node representing name of the declaration. The function
+ returns node found in the table, NULL if such node does not exist
+ in the table. */
+static decl_t
+find_decl (name)
+ char *name;
+{
+ void *entry;
+
+ work_decl.decl.unit.name = name;
+ entry = htab_find (decl_table, &work_decl);
+ return (decl_t) entry;
+}
+
+/* The function creates empty declaration table and node representing
+ declaration and used for searching declaration with given name.
+ The function must be called only once before any work with the
+ declaration table. */
+static void
+initiate_decl_table ()
+{
+ work_decl.mode = dm_unit;
+ decl_table = htab_create (10, decl_hash, decl_eq_p, (htab_del) 0);
+}
+
+/* The function deletes the declaration table. Only call of function
+ `initiate_declaration_table' is possible immediately after this
+ function call. */
+static void
+finish_decl_table ()
+{
+ htab_delete (decl_table);
+}
+
+\f
+
+/* This page contains checker of pipeline hazard description. */
+
+/* Checking NAMES in an exclusion clause vector and returning formed
+ unit_set_el_list. */
+static unit_set_el_t
+process_excls (names, num, excl_pos)
+ char **names;
+ int num;
+ pos_t excl_pos ATTRIBUTE_UNUSED;
+{
+ unit_set_el_t el_list;
+ unit_set_el_t last_el;
+ unit_set_el_t new_el;
+ decl_t decl_in_table;
+ int i;
+
+ el_list = NULL;
+ last_el = NULL;
+ for (i = 0; i < num; i++)
+ {
+ decl_in_table = find_decl (names [i]);
+ if (decl_in_table == NULL)
+ error ("unit `%s' in exclusion is not declared", names [i]);
+ else if (decl_in_table->mode != dm_unit)
+ error ("`%s' in exclusion is not unit", names [i]);
+ else
+ {
+ new_el = create_node (sizeof (struct unit_set_el));
+ new_el->unit_decl = &decl_in_table->decl.unit;
+ new_el->next_unit_set_el = NULL;
+ if (last_el == NULL)
+ el_list = last_el = new_el;
+ else
+ {
+ last_el->next_unit_set_el = new_el;
+ last_el = last_el->next_unit_set_el;
+ }
+ }
+ }
+ return el_list;
+}
+
+/* The function adds each element from SOURCE_LIST to the exclusion
+ list of the each element from DEST_LIST. Checking situation "unit
+ excludes itself". */
+static void
+add_excls (dest_list, source_list, excl_pos)
+ unit_set_el_t dest_list;
+ unit_set_el_t source_list;
+ pos_t excl_pos ATTRIBUTE_UNUSED;
+{
+ unit_set_el_t dst;
+ unit_set_el_t src;
+ unit_set_el_t curr_el;
+ unit_set_el_t prev_el;
+ unit_set_el_t copy;
+
+ for (dst = dest_list; dst != NULL; dst = dst->next_unit_set_el)
+ for (src = source_list; src != NULL; src = src->next_unit_set_el)
+ {
+ if (dst->unit_decl == src->unit_decl)
+ {
+ error ("unit `%s' excludes itself", src->unit_decl->name);
+ continue;
+ }
+ if (dst->unit_decl->automaton_name != NULL
+ && src->unit_decl->automaton_name != NULL
+ && strcmp (dst->unit_decl->automaton_name,
+ src->unit_decl->automaton_name) != 0)
+ {
+ error ("units `%s' and `%s' in exclusion set belong to different automata",
+ src->unit_decl->name, dst->unit_decl->name);
+ continue;
+ }
+ for (curr_el = dst->unit_decl->excl_list, prev_el = NULL;
+ curr_el != NULL;
+ prev_el = curr_el, curr_el = curr_el->next_unit_set_el)
+ if (curr_el->unit_decl == src->unit_decl)
+ break;
+ if (curr_el == NULL)
+ {
+ /* Element not found - insert. */
+ copy = copy_node (src, sizeof (*src));
+ copy->next_unit_set_el = NULL;
+ if (prev_el == NULL)
+ dst->unit_decl->excl_list = copy;
+ else
+ prev_el->next_unit_set_el = copy;
+ }
+ }
+}
+
+/* Checking NAMES in an presence clause vector and returning formed
+ unit_set_el_list. The function is called only after processing all
+ exclusion sets. */
+static unit_set_el_t
+process_presence_absence (names, num, req_pos, presence_p)
+ char **names;
+ int num;
+ pos_t req_pos ATTRIBUTE_UNUSED;
+ int presence_p;
+{
+ unit_set_el_t el_list;
+ unit_set_el_t last_el;
+ unit_set_el_t new_el;
+ decl_t decl_in_table;
+ int i;
+
+ el_list = NULL;
+ last_el = NULL;
+ for (i = 0; i < num; i++)
+ {
+ decl_in_table = find_decl (names [i]);
+ if (decl_in_table == NULL)
+ error ((presence_p
+ ? "unit `%s' in presence set is not declared"
+ : "unit `%s' in absence set is not declared"), names [i]);
+ else if (decl_in_table->mode != dm_unit)
+ error ((presence_p
+ ? "`%s' in presence set is not unit"
+ : "`%s' in absence set is not unit"), names [i]);
+ else
+ {
+ new_el = create_node (sizeof (struct unit_set_el));
+ new_el->unit_decl = &decl_in_table->decl.unit;
+ new_el->next_unit_set_el = NULL;
+ if (last_el == NULL)
+ el_list = last_el = new_el;
+ else
+ {
+ last_el->next_unit_set_el = new_el;
+ last_el = last_el->next_unit_set_el;
+ }
+ }
+ }
+ return el_list;
+}
+
+/* The function adds each element from SOURCE_LIST to presence (if
+ PRESENCE_P) or absence list of the each element from DEST_LIST.
+ Checking situations "unit requires own presence", "unit requires
+ own absence", and "unit excludes and requires presence of ...".
+ Remember that we process absence sets only after all presence
+ sets. */
+static void
+add_presence_absence (dest_list, source_list, req_pos, presence_p)
+ unit_set_el_t dest_list;
+ unit_set_el_t source_list;
+ pos_t req_pos ATTRIBUTE_UNUSED;
+ int presence_p;
+{
+ unit_set_el_t dst;
+ unit_set_el_t src;
+ unit_set_el_t curr_el;
+ unit_set_el_t prev_el;
+ unit_set_el_t copy;
+
+ for (dst = dest_list; dst != NULL; dst = dst->next_unit_set_el)
+ for (src = source_list; src != NULL; src = src->next_unit_set_el)
+ {
+ if (dst->unit_decl == src->unit_decl)
+ {
+ error ((presence_p
+ ? "unit `%s' requires own presence"
+ : "unit `%s' requires own absence"), src->unit_decl->name);
+ continue;
+ }
+ if (dst->unit_decl->automaton_name != NULL
+ && src->unit_decl->automaton_name != NULL
+ && strcmp (dst->unit_decl->automaton_name,
+ src->unit_decl->automaton_name) != 0)
+ {
+ error ((presence_p
+ ? "units `%s' and `%s' in presence set belong to different automata"
+ : "units `%s' and `%s' in absence set belong to different automata"),
+ src->unit_decl->name, dst->unit_decl->name);
+ continue;
+ }
+ for (curr_el = (presence_p
+ ? dst->unit_decl->presence_list
+ : dst->unit_decl->absence_list), prev_el = NULL;
+ curr_el != NULL;
+ prev_el = curr_el, curr_el = curr_el->next_unit_set_el)
+ if (curr_el->unit_decl == src->unit_decl)
+ break;
+ if (curr_el == NULL)
+ {
+ /* Element not found - insert if there is no error. */
+ int no_error_flag = 1;
+
+ if (presence_p)
+ for (curr_el = dst->unit_decl->excl_list;
+ curr_el != NULL;
+ curr_el = curr_el->next_unit_set_el)
+ {
+ if (src->unit_decl == curr_el->unit_decl)
+ {
+ if (!w_flag)
+ {
+ error
+ ("unit `%s' excludes and requires presence of `%s'",
+ dst->unit_decl->name, src->unit_decl->name);
+ no_error_flag = 0;
+ }
+ else
+ warning
+ ("unit `%s' excludes and requires presence of `%s'",
+ dst->unit_decl->name, src->unit_decl->name);
+ }
+ }
+ else
+ for (curr_el = dst->unit_decl->presence_list;
+ curr_el != NULL;
+ curr_el = curr_el->next_unit_set_el)
+ {
+ if (src->unit_decl == curr_el->unit_decl)
+ {
+ if (!w_flag)
+ {
+ error
+ ("unit `%s' requires absence and presence of `%s'",
+ dst->unit_decl->name, src->unit_decl->name);
+ no_error_flag = 0;
+ }
+ else
+ warning
+ ("unit `%s' requires absence and presence of `%s'",
+ dst->unit_decl->name, src->unit_decl->name);
+ }
+ }
+ if (no_error_flag)
+ {
+ copy = copy_node (src, sizeof (*src));
+ copy->next_unit_set_el = NULL;
+ if (prev_el == NULL)
+ {
+ if (presence_p)
+ dst->unit_decl->presence_list = copy;
+ else
+ dst->unit_decl->absence_list = copy;
+ }
+ else
+ prev_el->next_unit_set_el = copy;
+ }
+ }
+ }
+}
+
+/* The function searches for bypass with given IN_INSN_RESERV in given
+ BYPASS_LIST. */
+static struct bypass_decl *
+find_bypass (bypass_list, in_insn_reserv)
+ struct bypass_decl *bypass_list;
+ struct insn_reserv_decl *in_insn_reserv;
+{
+ struct bypass_decl *bypass;
+
+ for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
+ if (bypass->in_insn_reserv == in_insn_reserv)
+ break;
+ return bypass;
+}
+
+/* The function processes pipeline description declarations, checks
+ their correctness, and forms exclusion/presence/absence sets. */
+static void
+process_decls ()
+{
+ decl_t decl;
+ decl_t automaton_decl;
+ decl_t decl_in_table;
+ decl_t out_insn_reserv;
+ decl_t in_insn_reserv;
+ struct bypass_decl *bypass;
+ int automaton_presence;
+ int i;
+
+ /* Checking repeated automata declarations. */
+ automaton_presence = 0;
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_automaton)
+ {
+ automaton_presence = 1;
+ decl_in_table = insert_automaton_decl (decl);
+ if (decl_in_table != decl)
+ {
+ if (!w_flag)
+ error ("repeated declaration of automaton `%s'",
+ decl->decl.automaton.name);
+ else
+ warning ("repeated declaration of automaton `%s'",
+ decl->decl.automaton.name);
+ }
+ }
+ }
+ /* Checking undeclared automata, repeated declarations (except for
+ automata) and correctness of their attributes (insn latency times
+ etc.). */
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_insn_reserv)
+ {
+ decl->decl.insn_reserv.condexp
+ = check_attr_test (decl->decl.insn_reserv.condexp, 0, 0);
+ if (decl->decl.insn_reserv.default_latency < 0)
+ error ("define_insn_reservation `%s' has negative latency time",
+ decl->decl.insn_reserv.name);
+ decl->decl.insn_reserv.insn_num = description->insns_num;
+ description->insns_num++;
+ decl_in_table = insert_insn_decl (decl);
+ if (decl_in_table != decl)
+ error ("`%s' is already used as insn reservation name",
+ decl->decl.insn_reserv.name);
+ }
+ else if (decl->mode == dm_bypass)
+ {
+ if (decl->decl.bypass.latency < 0)
+ error ("define_bypass `%s - %s' has negative latency time",
+ decl->decl.bypass.out_insn_name,
+ decl->decl.bypass.in_insn_name);
+ }
+ else if (decl->mode == dm_unit || decl->mode == dm_reserv)
+ {
+ if (decl->mode == dm_unit)
+ {
+ decl->decl.unit.automaton_decl = NULL;
+ if (decl->decl.unit.automaton_name != NULL)
+ {
+ automaton_decl
+ = find_automaton_decl (decl->decl.unit.automaton_name);
+ if (automaton_decl == NULL)
+ error ("automaton `%s' is not declared",
+ decl->decl.unit.automaton_name);
+ else
+ {
+ automaton_decl->decl.automaton.automaton_is_used = 1;
+ decl->decl.unit.automaton_decl
+ = &automaton_decl->decl.automaton;
+ }
+ }
+ else if (automaton_presence)
+ error ("define_unit `%s' without automaton when one defined",
+ decl->decl.unit.name);
+ decl->decl.unit.unit_num = description->units_num;
+ description->units_num++;
+ if (strcmp (decl->decl.unit.name, NOTHING_NAME) == 0)
+ {
+ error ("`%s' is declared as cpu unit", NOTHING_NAME);
+ continue;
+ }
+ decl_in_table = find_decl (decl->decl.unit.name);
+ }
+ else
+ {
+ if (strcmp (decl->decl.reserv.name, NOTHING_NAME) == 0)
+ {
+ error ("`%s' is declared as cpu reservation", NOTHING_NAME);
+ continue;
+ }
+ decl_in_table = find_decl (decl->decl.reserv.name);
+ }
+ if (decl_in_table == NULL)
+ decl_in_table = insert_decl (decl);
+ else
+ {
+ if (decl->mode == dm_unit)
+ error ("repeated declaration of unit `%s'",
+ decl->decl.unit.name);
+ else
+ error ("repeated declaration of reservation `%s'",
+ decl->decl.reserv.name);
+ }
+ }
+ }
+ /* Check bypasses and form list of bypasses for each (output)
+ insn. */
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_bypass)
+ {
+ out_insn_reserv = find_insn_decl (decl->decl.bypass.out_insn_name);
+ in_insn_reserv = find_insn_decl (decl->decl.bypass.in_insn_name);
+ if (out_insn_reserv == NULL)
+ error ("there is no insn reservation `%s'",
+ decl->decl.bypass.out_insn_name);
+ else if (in_insn_reserv == NULL)
+ error ("there is no insn reservation `%s'",
+ decl->decl.bypass.in_insn_name);
+ else
+ {
+ decl->decl.bypass.out_insn_reserv
+ = &out_insn_reserv->decl.insn_reserv;
+ decl->decl.bypass.in_insn_reserv
+ = &in_insn_reserv->decl.insn_reserv;
+ bypass
+ = find_bypass (out_insn_reserv->decl.insn_reserv.bypass_list,
+ decl->decl.bypass.in_insn_reserv);
+ if (bypass != NULL)
+ {
+ if (decl->decl.bypass.latency == bypass->latency)
+ {
+ if (!w_flag)
+ error
+ ("the same bypass `%s - %s' is already defined",
+ decl->decl.bypass.out_insn_name,
+ decl->decl.bypass.in_insn_name);
+ else
+ warning
+ ("the same bypass `%s - %s' is already defined",
+ decl->decl.bypass.out_insn_name,
+ decl->decl.bypass.in_insn_name);
+ }
+ else
+ error ("bypass `%s - %s' is already defined",
+ decl->decl.bypass.out_insn_name,
+ decl->decl.bypass.in_insn_name);
+ }
+ else
+ {
+ decl->decl.bypass.next
+ = out_insn_reserv->decl.insn_reserv.bypass_list;
+ out_insn_reserv->decl.insn_reserv.bypass_list
+ = &decl->decl.bypass;
+ }
+ }
+ }
+ }
+
+ /* Check exclusion set declarations and form exclussion sets. */
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_excl)
+ {
+ unit_set_el_t unit_set_el_list;
+ unit_set_el_t unit_set_el_list_2;
+
+ unit_set_el_list
+ = process_excls (decl->decl.excl.names,
+ decl->decl.excl.first_list_length, decl->pos);
+ unit_set_el_list_2
+ = process_excls (&decl->decl.excl.names
+ [decl->decl.excl.first_list_length],
+ decl->decl.excl.names_num
+ - decl->decl.excl.first_list_length,
+ decl->pos);
+ add_excls (unit_set_el_list, unit_set_el_list_2, decl->pos);
+ add_excls (unit_set_el_list_2, unit_set_el_list, decl->pos);
+ }
+ }
+
+ /* Check presence set declarations and form presence sets. */
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_presence)
+ {
+ unit_set_el_t unit_set_el_list;
+ unit_set_el_t unit_set_el_list_2;
+
+ unit_set_el_list
+ = process_presence_absence
+ (decl->decl.presence.names,
+ decl->decl.presence.first_list_length, decl->pos, 1);
+ unit_set_el_list_2
+ = process_presence_absence
+ (&decl->decl.presence.names
+ [decl->decl.presence.first_list_length],
+ decl->decl.presence.names_num
+ - decl->decl.presence.first_list_length,
+ decl->pos, 1);
+ add_presence_absence (unit_set_el_list, unit_set_el_list_2,
+ decl->pos, 1);
+ }
+ }
+
+ /* Check absence set declarations and form absence sets. */
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_absence)
+ {
+ unit_set_el_t unit_set_el_list;
+ unit_set_el_t unit_set_el_list_2;
+
+ unit_set_el_list
+ = process_presence_absence
+ (decl->decl.presence.names,
+ decl->decl.presence.first_list_length, decl->pos, 0);
+ unit_set_el_list_2
+ = process_presence_absence
+ (&decl->decl.presence.names
+ [decl->decl.presence.first_list_length],
+ decl->decl.presence.names_num
+ - decl->decl.presence.first_list_length,
+ decl->pos, 0);
+ add_presence_absence (unit_set_el_list, unit_set_el_list_2,
+ decl->pos, 0);
+ }
+ }
+}
+
+/* The following function checks that declared automaton is used. If
+ the automaton is not used, the function fixes error/warning. The
+ following function must be called only after `process_decls'. */
+static void
+check_automaton_usage ()
+{
+ decl_t decl;
+ int i;
+
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_automaton
+ && !decl->decl.automaton.automaton_is_used)
+ {
+ if (!w_flag)
+ error ("automaton `%s' is not used", decl->decl.automaton.name);
+ else
+ warning ("automaton `%s' is not used", decl->decl.automaton.name);
+ }
+ }
+}
+
+/* The following recursive function processes all regexp in order to
+ fix usage of units or reservations and to fix errors of undeclared
+ name. The function may change unit_regexp onto reserv_regexp.
+ Remember that reserv_regexp does not exist before the function
+ call. */
+static regexp_t
+process_regexp (regexp)
+ regexp_t regexp;
+{
+ decl_t decl_in_table;
+ regexp_t new_regexp;
+ int i;
+
+ if (regexp->mode == rm_unit)
+ {
+ decl_in_table = find_decl (regexp->regexp.unit.name);
+ if (decl_in_table == NULL)
+ error ("undeclared unit or reservation `%s'",
+ regexp->regexp.unit.name);
+ else if (decl_in_table->mode == dm_unit)
+ {
+ decl_in_table->decl.unit.unit_is_used = 1;
+ regexp->regexp.unit.unit_decl = &decl_in_table->decl.unit;
+ }
+ else if (decl_in_table->mode == dm_reserv)
+ {
+ decl_in_table->decl.reserv.reserv_is_used = 1;
+ new_regexp = create_node (sizeof (struct regexp));
+ new_regexp->mode = rm_reserv;
+ new_regexp->pos = regexp->pos;
+ new_regexp->regexp.reserv.name = regexp->regexp.unit.name;
+ new_regexp->regexp.reserv.reserv_decl = &decl_in_table->decl.reserv;
+ regexp = new_regexp;
+ }
+ else
+ abort ();
+ }
+ else if (regexp->mode == rm_sequence)
+ for (i = 0; i < regexp->regexp.sequence.regexps_num; i++)
+ regexp->regexp.sequence.regexps [i]
+ = process_regexp (regexp->regexp.sequence.regexps [i]);
+ else if (regexp->mode == rm_allof)
+ for (i = 0; i < regexp->regexp.allof.regexps_num; i++)
+ regexp->regexp.allof.regexps [i]
+ = process_regexp (regexp->regexp.allof.regexps [i]);
+ else if (regexp->mode == rm_oneof)
+ for (i = 0; i < regexp->regexp.oneof.regexps_num; i++)
+ regexp->regexp.oneof.regexps [i]
+ = process_regexp (regexp->regexp.oneof.regexps [i]);
+ else if (regexp->mode == rm_repeat)
+ regexp->regexp.repeat.regexp
+ = process_regexp (regexp->regexp.repeat.regexp);
+ else if (regexp->mode != rm_nothing)
+ abort ();
+ return regexp;
+}
+
+/* The following function processes regexp of define_reservation and
+ define_insn_reservation with the aid of function
+ `process_regexp'. */
+static void
+process_regexp_decls ()
+{
+ decl_t decl;
+ int i;
+
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_reserv)
+ decl->decl.reserv.regexp = process_regexp (decl->decl.reserv.regexp);
+ else if (decl->mode == dm_insn_reserv)
+ decl->decl.insn_reserv.regexp
+ = process_regexp (decl->decl.insn_reserv.regexp);
+ }
+}
+
+/* The following function checks that declared unit is used. If the
+ unit is not used, the function fixes errors/warnings. The
+ following function must be called only after `process_decls',
+ `process_regexp_decls'. */
+static void
+check_usage ()
+{
+ decl_t decl;
+ int i;
+
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_unit && !decl->decl.unit.unit_is_used)
+ {
+ if (!w_flag)
+ error ("unit `%s' is not used", decl->decl.unit.name);
+ else
+ warning ("unit `%s' is not used", decl->decl.unit.name);
+ }
+ else if (decl->mode == dm_reserv && !decl->decl.reserv.reserv_is_used)
+ {
+ if (!w_flag)
+ error ("reservation `%s' is not used", decl->decl.reserv.name);
+ else
+ warning ("reservation `%s' is not used", decl->decl.reserv.name);
+ }
+ }
+}
+
+/* The following variable value is number of reservation being
+ processed on loop recognition. */
+static int curr_loop_pass_num;
+
+/* The following recursive function returns nonzero value if REGEXP
+ contains given decl or reservations in given regexp refers for
+ given decl. */
+static int
+loop_in_regexp (regexp, start_decl)
+ regexp_t regexp;
+ decl_t start_decl;
+{
+ int i;
+
+ if (regexp == NULL)
+ return 0;
+ if (regexp->mode == rm_unit)
+ return 0;
+ else if (regexp->mode == rm_reserv)
+ {
+ if (start_decl->mode == dm_reserv
+ && regexp->regexp.reserv.reserv_decl == &start_decl->decl.reserv)
+ return 1;
+ else if (regexp->regexp.reserv.reserv_decl->loop_pass_num
+ == curr_loop_pass_num)
+ /* declaration has been processed. */
+ return 0;
+ else
+ {
+ regexp->regexp.reserv.reserv_decl->loop_pass_num
+ = curr_loop_pass_num;
+ return loop_in_regexp (regexp->regexp.reserv.reserv_decl->regexp,
+ start_decl);
+ }
+ }
+ else if (regexp->mode == rm_sequence)
+ {
+ for (i = 0; i < regexp->regexp.sequence.regexps_num; i++)
+ if (loop_in_regexp (regexp->regexp.sequence.regexps [i], start_decl))
+ return 1;
+ return 0;
+ }
+ else if (regexp->mode == rm_allof)
+ {
+ for (i = 0; i < regexp->regexp.allof.regexps_num; i++)
+ if (loop_in_regexp (regexp->regexp.allof.regexps [i], start_decl))
+ return 1;
+ return 0;
+ }
+ else if (regexp->mode == rm_oneof)
+ {
+ for (i = 0; i < regexp->regexp.oneof.regexps_num; i++)
+ if (loop_in_regexp (regexp->regexp.oneof.regexps [i], start_decl))
+ return 1;
+ return 0;
+ }
+ else if (regexp->mode == rm_repeat)
+ return loop_in_regexp (regexp->regexp.repeat.regexp, start_decl);
+ else
+ {
+ if (regexp->mode != rm_nothing)
+ abort ();
+ return 0;
+ }
+}
+
+/* The following function fixes errors "cycle in definition ...". The
+ function uses function `loop_in_regexp' for that. */
+static void
+check_loops_in_regexps ()
+{
+ decl_t decl;
+ int i;
+
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_reserv)
+ decl->decl.reserv.loop_pass_num = 0;
+ }
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ curr_loop_pass_num = i;
+
+ if (decl->mode == dm_reserv)
+ {
+ decl->decl.reserv.loop_pass_num = curr_loop_pass_num;
+ if (loop_in_regexp (decl->decl.reserv.regexp, decl))
+ {
+ if (decl->decl.reserv.regexp == NULL)
+ abort ();
+ error ("cycle in definition of reservation `%s'",
+ decl->decl.reserv.name);
+ }
+ }
+ }
+}
+
+/* The function recursively processes IR of reservation and defines
+ max and min cycle for reservation of unit and for result in the
+ reservation. */
+static int
+process_regexp_cycles (regexp, start_cycle)
+ regexp_t regexp;
+ int start_cycle;
+{
+ int i;
+
+ if (regexp->mode == rm_unit)
+ {
+ if (regexp->regexp.unit.unit_decl->max_occ_cycle_num < start_cycle)
+ regexp->regexp.unit.unit_decl->max_occ_cycle_num = start_cycle;
+ return start_cycle;
+ }
+ else if (regexp->mode == rm_reserv)
+ return process_regexp_cycles (regexp->regexp.reserv.reserv_decl->regexp,
+ start_cycle);
+ else if (regexp->mode == rm_repeat)
+ {
+ for (i = 0; i < regexp->regexp.repeat.repeat_num; i++)
+ start_cycle = process_regexp_cycles (regexp->regexp.repeat.regexp,
+ start_cycle) + 1;
+ return start_cycle;
+ }
+ else if (regexp->mode == rm_sequence)
+ {
+ for (i = 0; i < regexp->regexp.sequence.regexps_num; i++)
+ start_cycle
+ = process_regexp_cycles (regexp->regexp.sequence.regexps [i],
+ start_cycle) + 1;
+ return start_cycle;
+ }
+ else if (regexp->mode == rm_allof)
+ {
+ int finish_cycle = 0;
+ int cycle;
+
+ for (i = 0; i < regexp->regexp.allof.regexps_num; i++)
+ {
+ cycle = process_regexp_cycles (regexp->regexp.allof.regexps [i],
+ start_cycle);
+ if (finish_cycle < cycle)
+ finish_cycle = cycle;
+ }
+ return finish_cycle;
+ }
+ else if (regexp->mode == rm_oneof)
+ {
+ int finish_cycle = 0;
+ int cycle;
+
+ for (i = 0; i < regexp->regexp.oneof.regexps_num; i++)
+ {
+ cycle = process_regexp_cycles (regexp->regexp.oneof.regexps [i],
+ start_cycle);
+ if (finish_cycle < cycle)
+ finish_cycle = cycle;
+ }
+ return finish_cycle;
+ }
+ else
+ {
+ if (regexp->mode != rm_nothing)
+ abort ();
+ return start_cycle;
+ }
+}
+
+/* The following function is called only for correct program. The
+ function defines max reservation of insns in cycles. */
+static void
+evaluate_max_reserv_cycles ()
+{
+ int max_insn_cycles_num;
+ decl_t decl;
+ int i;
+
+ description->max_insn_reserv_cycles = 0;
+ for (i = 0; i < description->decls_num; i++)
+ {
+ decl = description->decls [i];
+ if (decl->mode == dm_insn_reserv)
+ {
+ max_insn_cycles_num
+ = process_regexp_cycles (decl->decl.insn_reserv.regexp, 0);
+ if (description->max_insn_reserv_cycles < max_insn_cycles_num)
+ description->max_insn_reserv_cycles = max_insn_cycles_num;
+ }
+ }
+}
+
+/* The following function calls functions for checking all
+ description. */
+static void
+check_all_description ()
+{
+ process_decls ();
+ check_automaton_usage ();
+ process_regexp_decls ();
+ check_usage ();
+ check_loops_in_regexps ();
+ if (!have_error)
+ evaluate_max_reserv_cycles ();
+}
+
+\f
+
+/* The page contains abstract data `ticker'. This data is used to
+ report time of different phases of building automata. It is
+ possibly to write a description for which automata will be built
+ during several minutes even on fast machine. */
+
+/* The following function creates ticker and makes it active. */
+static ticker_t
+create_ticker ()
+{
+ ticker_t ticker;
+
+ ticker.modified_creation_time = get_run_time ();
+ ticker.incremented_off_time = 0;
+ return ticker;
+}
+
+/* The following function switches off given ticker. */
+static void
+ticker_off (ticker)
+ ticker_t *ticker;
+{
+ if (ticker->incremented_off_time == 0)
+ ticker->incremented_off_time = get_run_time () + 1;
+}
+
+/* The following function switches on given ticker. */
+static void
+ticker_on (ticker)
+ ticker_t *ticker;
+{
+ if (ticker->incremented_off_time != 0)
+ {
+ ticker->modified_creation_time
+ += get_run_time () - ticker->incremented_off_time + 1;
+ ticker->incremented_off_time = 0;
+ }
+}
+
+/* The following function returns current time in milliseconds since
+ the moment when given ticker was created. */
+static int
+active_time (ticker)
+ ticker_t ticker;
+{
+ if (ticker.incremented_off_time != 0)
+ return ticker.incremented_off_time - 1 - ticker.modified_creation_time;
+ else
+ return get_run_time () - ticker.modified_creation_time;
+}
+
+/* The following function returns string representation of active time
+ of given ticker. The result is string representation of seconds
+ with accuracy of 1/100 second. Only result of the last call of the
+ function exists. Therefore the following code is not correct
+
+ printf ("parser time: %s\ngeneration time: %s\n",
+ active_time_string (parser_ticker),
+ active_time_string (generation_ticker));
+
+ Correct code has to be the following
+
+ printf ("parser time: %s\n", active_time_string (parser_ticker));
+ printf ("generation time: %s\n",
+ active_time_string (generation_ticker));
+
+*/
+static void
+print_active_time (f, ticker)
+ FILE *f;
+ ticker_t ticker;
+{
+ int msecs;
+
+ msecs = active_time (ticker);
+ fprintf (f, "%d.%06d", msecs / 1000000, msecs % 1000000);
+}
+
+\f
+
+/* The following variable value is number of automaton which are
+ really being created. This value is defined on the base of
+ argument of option `-split'. If the variable has zero value the
+ number of automata is defined by the constructions `%automaton'.
+ This case occures when option `-split' is absent or has zero
+ argument. If constructions `define_automaton' is absent only one
+ automaton is created. */
+static int automata_num;
+
+/* The following variable values are times of
+ o transformation of regular expressions
+ o building NDFA (DFA if !ndfa_flag)
+ o NDFA -> DFA (simply the same automaton if !ndfa_flag)
+ o DFA minimization
+ o building insn equivalence classes
+ o all previous ones
+ o code output */
+static ticker_t transform_time;
+static ticker_t NDFA_time;
+static ticker_t NDFA_to_DFA_time;
+static ticker_t minimize_time;
+static ticker_t equiv_time;
+static ticker_t automaton_generation_time;
+static ticker_t output_time;
+
+/* The following variable values are times of
+ all checking
+ all generation
+ all pipeline hazard translator work */
+static ticker_t check_time;
+static ticker_t generation_time;
+static ticker_t all_time;
+
+\f
+
+/* Pseudo insn decl which denotes advancing cycle. */
+static decl_t advance_cycle_insn_decl;
+static void
+add_advance_cycle_insn_decl ()
+{
+ advance_cycle_insn_decl = create_node (sizeof (struct decl));
+ advance_cycle_insn_decl->mode = dm_insn_reserv;
+ advance_cycle_insn_decl->pos = no_pos;
+ advance_cycle_insn_decl->decl.insn_reserv.regexp = NULL;
+ advance_cycle_insn_decl->decl.insn_reserv.name = (char *) "$advance_cycle";
+ advance_cycle_insn_decl->decl.insn_reserv.insn_num = description->insns_num;
+ description->decls [description->decls_num] = advance_cycle_insn_decl;
+ description->decls_num++;
+ description->insns_num++;
+ num_dfa_decls++;
+}
+
+\f
+/* Abstract data `alternative states' which reperesents
+ nondeterministic nature of the description (see comments for
+ structures alt_state and state). */
+
+/* List of free states. */
+static alt_state_t first_free_alt_state;
+
+#ifndef NDEBUG
+/* The following variables is maximal number of allocated nodes
+ alt_state. */
+static int allocated_alt_states_num = 0;
+#endif
+
+/* The following function returns free node alt_state. It may be new
+ allocated node or node freed eralier. */
+static alt_state_t
+get_free_alt_state ()
+{
+ alt_state_t result;
+
+ if (first_free_alt_state != NULL)
+ {
+ result = first_free_alt_state;
+ first_free_alt_state = first_free_alt_state->next_alt_state;
+ }
+ else
+ {
+#ifndef NDEBUG
+ allocated_alt_states_num++;
+#endif
+ result = create_node (sizeof (struct alt_state));
+ }
+ result->state = NULL;
+ result->next_alt_state = NULL;
+ result->next_sorted_alt_state = NULL;
+ return result;
+}
+
+/* The function frees node ALT_STATE. */
+static void
+free_alt_state (alt_state)
+ alt_state_t alt_state;
+{
+ if (alt_state == NULL)
+ return;
+ alt_state->next_alt_state = first_free_alt_state;
+ first_free_alt_state = alt_state;
+}
+
+/* The function frees list started with node ALT_STATE_LIST. */
+static void
+free_alt_states (alt_states_list)
+ alt_state_t alt_states_list;
+{
+ alt_state_t curr_alt_state;
+ alt_state_t next_alt_state;
+
+ for (curr_alt_state = alt_states_list;
+ curr_alt_state != NULL;
+ curr_alt_state = next_alt_state)
+ {
+ next_alt_state = curr_alt_state->next_alt_state;
+ free_alt_state (curr_alt_state);
+ }
+}
+
+/* The function compares unique numbers of alt states. */
+static int
+alt_state_cmp (alt_state_ptr_1, alt_state_ptr_2)
+ const void *alt_state_ptr_1;
+ const void *alt_state_ptr_2;
+{
+ if ((*(alt_state_t *) alt_state_ptr_1)->state->unique_num
+ == (*(alt_state_t *) alt_state_ptr_2)->state->unique_num)
+ return 0;
+ else if ((*(alt_state_t *) alt_state_ptr_1)->state->unique_num
+ < (*(alt_state_t *) alt_state_ptr_2)->state->unique_num)
+ return -1;
+ else
+ return 1;
+}
+
+/* The function sorts ALT_STATES_LIST and removes duplicated alt
+ states from the list. The comparison key is alt state unique
+ number. */
+static alt_state_t
+uniq_sort_alt_states (alt_states_list)
+ alt_state_t alt_states_list;
+{
+ alt_state_t curr_alt_state;
+ vla_ptr_t alt_states;
+ size_t i;
+ size_t prev_unique_state_ind;
+ alt_state_t result;
+ alt_state_t *result_ptr;
+
+ VLA_PTR_CREATE (alt_states, 150, "alt_states");
+ for (curr_alt_state = alt_states_list;
+ curr_alt_state != NULL;
+ curr_alt_state = curr_alt_state->next_alt_state)
+ VLA_PTR_ADD (alt_states, curr_alt_state);
+ qsort (VLA_PTR_BEGIN (alt_states), VLA_PTR_LENGTH (alt_states),
+ sizeof (alt_state_t), alt_state_cmp);
+ if (VLA_PTR_LENGTH (alt_states) == 0)
+ result = NULL;
+ else
+ {
+ result_ptr = VLA_PTR_BEGIN (alt_states);
+ prev_unique_state_ind = 0;
+ for (i = 1; i < VLA_PTR_LENGTH (alt_states); i++)
+ if (result_ptr [prev_unique_state_ind]->state != result_ptr [i]->state)
+ {
+ prev_unique_state_ind++;
+ result_ptr [prev_unique_state_ind] = result_ptr [i];
+ }
+#if 0
+ for (i = prev_unique_state_ind + 1; i < VLA_PTR_LENGTH (alt_states); i++)
+ free_alt_state (result_ptr [i]);
+#endif
+ VLA_PTR_SHORTEN (alt_states, i - prev_unique_state_ind - 1);
+ result_ptr = VLA_PTR_BEGIN (alt_states);
+ for (i = 1; i < VLA_PTR_LENGTH (alt_states); i++)
+ result_ptr [i - 1]->next_sorted_alt_state = result_ptr [i];
+ result_ptr [i - 1]->next_sorted_alt_state = NULL;
+ result = *result_ptr;
+ }
+ VLA_PTR_DELETE (alt_states);
+ return result;
+}
+
+/* The function checks equality of alt state lists. Remember that the
+ lists must be already sorted by the previous function. */
+static int
+alt_states_eq (alt_states_1, alt_states_2)
+ alt_state_t alt_states_1;
+ alt_state_t alt_states_2;
+{
+ while (alt_states_1 != NULL && alt_states_2 != NULL
+ && alt_state_cmp (&alt_states_1, &alt_states_2) == 0)
+ {
+ alt_states_1 = alt_states_1->next_sorted_alt_state;
+ alt_states_2 = alt_states_2->next_sorted_alt_state;
+ }
+ return alt_states_1 == alt_states_2;
+}
+
+/* Initialization of the abstract data. */
+static void
+initiate_alt_states ()
+{
+ first_free_alt_state = NULL;
+}
+
+/* Finishing work with the abstract data. */
+static void
+finish_alt_states ()
+{
+}
+
+\f
+
+/* The page contains macros for work with bits strings. We could use
+ standard gcc bitmap or sbitmap but it would result in difficulties
+ of building canadian cross. */
+
+/* Set bit number bitno in the bit string. The macro is not side
+ effect proof. */
+#define SET_BIT(bitstring, bitno) \
+ (((char *) (bitstring)) [(bitno) / CHAR_BIT] |= 1 << (bitno) % CHAR_BIT)
+
+/* Test if bit number bitno in the bitstring is set. The macro is not
+ side effect proof. */
+#define TEST_BIT(bitstring, bitno) \
+ (((char *) (bitstring)) [(bitno) / CHAR_BIT] >> (bitno) % CHAR_BIT & 1)
+
+\f
+
+/* This page contains abstract data `state'. */
+
+/* Maximal length of reservations in cycles (> 1). */
+static int max_cycles_num;
+
+/* Number of set elements (see type set_el_t) needed for
+ representation of one cycle reservation. It is depended on units
+ number. */
+static int els_in_cycle_reserv;
+
+/* Number of set elements (see type set_el_t) needed for
+ representation of maximal length reservation. Deterministic
+ reservation is stored as set (bit string) of length equal to the
+ variable value * number of bits in set_el_t. */
+static int els_in_reservs;
+
+/* VLA for representation of array of pointers to unit
+ declarations. */
+static vla_ptr_t units_container;
+
+/* The start address of the array. */
+static struct unit_decl **units_array;
+
+/* Empty reservation of maximal length. */
+static reserv_sets_t empty_reserv;
+
+/* The state table itself is represented by the following variable. */
+static htab_t state_table;
+
+/* VLA for representation of array of pointers to free nodes
+ `state'. */
+static vla_ptr_t free_states;
+
+static int curr_unique_state_num;
+
+#ifndef NDEBUG
+/* The following variables is maximal number of allocated nodes
+ `state'. */
+static int allocated_states_num = 0;
+#endif
+
+/* Allocate new reservation set. */
+static reserv_sets_t
+alloc_empty_reserv_sets ()
+{
+ reserv_sets_t result;
+
+ obstack_blank (&irp, els_in_reservs * sizeof (set_el_t));
+ result = (reserv_sets_t) obstack_base (&irp);
+ obstack_finish (&irp);
+ memset (result, 0, els_in_reservs * sizeof (set_el_t));
+ return result;
+}
+
+/* Hash value of reservation set. */
+static unsigned
+reserv_sets_hash_value (reservs)
+ reserv_sets_t reservs;
+{
+ unsigned int hash_value;
+ int reservs_num;
+ set_el_t *reserv_ptr;
+
+ hash_value = 0;
+ reservs_num = els_in_reservs;
+ reserv_ptr = reservs;
+ while (reservs_num != 0)
+ {
+ reservs_num--;
+ hash_value = ((hash_value >> (sizeof (unsigned) - 1) * CHAR_BIT)
+ | (hash_value << CHAR_BIT)) + *reserv_ptr;
+ reserv_ptr++;
+ }
+ return hash_value;
+}
+
+/* Comparison of given reservation sets. */
+static int
+reserv_sets_cmp (reservs_1, reservs_2)
+ reserv_sets_t reservs_1;
+ reserv_sets_t reservs_2;
+{
+ int reservs_num;
+ set_el_t *reserv_ptr_1;
+ set_el_t *reserv_ptr_2;
+
+ if (reservs_1 == NULL || reservs_2 == NULL)
+ abort ();
+ reservs_num = els_in_reservs;
+ reserv_ptr_1 = reservs_1;
+ reserv_ptr_2 = reservs_2;
+ while (reservs_num != 0 && *reserv_ptr_1 == *reserv_ptr_2)
+ {
+ reservs_num--;
+ reserv_ptr_1++;
+ reserv_ptr_2++;
+ }
+ if (reservs_num == 0)
+ return 0;
+ else if (*reserv_ptr_1 < *reserv_ptr_2)
+ return -1;
+ else
+ return 1;
+}
+
+/* The function checks equality of the reservation sets. */
+static int
+reserv_sets_eq (reservs_1, reservs_2)
+ reserv_sets_t reservs_1;
+ reserv_sets_t reservs_2;
+{
+ return reserv_sets_cmp (reservs_1, reservs_2) == 0;
+}
+
+/* Set up in the reservation set that unit with UNIT_NUM is used on
+ CYCLE_NUM. */
+static void
+set_unit_reserv (reservs, cycle_num, unit_num)
+ reserv_sets_t reservs;
+ int cycle_num;
+ int unit_num;
+{
+ if (cycle_num >= max_cycles_num)
+ abort ();
+ SET_BIT (reservs, cycle_num * els_in_cycle_reserv
+ * sizeof (set_el_t) * CHAR_BIT + unit_num);
+}
+
+/* Set up in the reservation set RESERVS that unit with UNIT_NUM is
+ used on CYCLE_NUM. */
+static int
+test_unit_reserv (reservs, cycle_num, unit_num)
+ reserv_sets_t reservs;
+ int cycle_num;
+ int unit_num;
+{
+ if (cycle_num >= max_cycles_num)
+ abort ();
+ return TEST_BIT (reservs, cycle_num * els_in_cycle_reserv
+ * sizeof (set_el_t) * CHAR_BIT + unit_num);
+}
+
+/* The function checks that the reservation set represents no one unit
+ reservation. */
+static int
+it_is_empty_reserv_sets (operand)
+ reserv_sets_t operand;
+{
+ set_el_t *reserv_ptr;
+ int reservs_num;
+
+ if (operand == NULL)
+ abort ();
+ for (reservs_num = els_in_reservs, reserv_ptr = operand;
+ reservs_num != 0;
+ reserv_ptr++, reservs_num--)
+ if (*reserv_ptr != 0)
+ return 0;
+ return 1;
+}
+
+/* The function checks that the reservation sets are intersected,
+ i.e. there is a unit reservation on a cycle in both reservation
+ sets. */
+static int
+reserv_sets_are_intersected (operand_1, operand_2)
+ reserv_sets_t operand_1;
+ reserv_sets_t operand_2;
+{
+ set_el_t *el_ptr_1;
+ set_el_t *el_ptr_2;
+ set_el_t *cycle_ptr_1;
+ set_el_t *cycle_ptr_2;
+ int nonzero_p;
+
+ if (operand_1 == NULL || operand_2 == NULL)
+ abort ();
+ for (el_ptr_1 = operand_1, el_ptr_2 = operand_2;
+ el_ptr_1 < operand_1 + els_in_reservs;
+ el_ptr_1++, el_ptr_2++)
+ if (*el_ptr_1 & *el_ptr_2)
+ return 1;
+ for (cycle_ptr_1 = operand_1, cycle_ptr_2 = operand_2;
+ cycle_ptr_1 < operand_1 + els_in_reservs;
+ cycle_ptr_1 += els_in_cycle_reserv, cycle_ptr_2 += els_in_cycle_reserv)
+ {
+ for (el_ptr_1 = cycle_ptr_1, el_ptr_2 = get_excl_set (cycle_ptr_2);
+ el_ptr_1 < cycle_ptr_1 + els_in_cycle_reserv;
+ el_ptr_1++, el_ptr_2++)
+ if (*el_ptr_1 & *el_ptr_2)
+ return 1;
+ nonzero_p = 0;
+ for (el_ptr_1 = cycle_ptr_1,
+ el_ptr_2 = get_presence_absence_set (cycle_ptr_2, 1);
+ el_ptr_1 < cycle_ptr_1 + els_in_cycle_reserv;
+ el_ptr_1++, el_ptr_2++)
+ if (*el_ptr_1 & *el_ptr_2)
+ break;
+ else if (*el_ptr_2 != 0)
+ nonzero_p = 1;
+ if (nonzero_p && el_ptr_1 >= cycle_ptr_1 + els_in_cycle_reserv)
+ return 1;
+ for (el_ptr_1 = cycle_ptr_1,
+ el_ptr_2 = get_presence_absence_set (cycle_ptr_2, 0);
+ el_ptr_1 < cycle_ptr_1 + els_in_cycle_reserv;
+ el_ptr_1++, el_ptr_2++)
+ /* It looks like code for exclusion but exclusion set is
+ made as symmetric relation preliminary. */
+ if (*el_ptr_1 & *el_ptr_2)
+ return 1;
+ }
+ return 0;
+}
+
+/* The function sets up RESULT bits by bits of OPERAND shifted on one
+ cpu cycle. The remaining bits of OPERAND (representing the last
+ cycle unit reservations) are not chenged. */
+static void
+reserv_sets_shift (result, operand)
+ reserv_sets_t result;
+ reserv_sets_t operand;
+{
+ int i;
+
+ if (result == NULL || operand == NULL || result == operand)
+ abort ();
+ for (i = els_in_cycle_reserv; i < els_in_reservs; i++)
+ result [i - els_in_cycle_reserv] = operand [i];
+}
+
+/* OR of the reservation sets. */
+static void
+reserv_sets_or (result, operand_1, operand_2)
+ reserv_sets_t result;
+ reserv_sets_t operand_1;
+ reserv_sets_t operand_2;
+{
+ set_el_t *el_ptr_1;
+ set_el_t *el_ptr_2;
+ set_el_t *result_set_el_ptr;
+
+ if (result == NULL || operand_1 == NULL || operand_2 == NULL)
+ abort ();
+ for (el_ptr_1 = operand_1, el_ptr_2 = operand_2, result_set_el_ptr = result;
+ el_ptr_1 < operand_1 + els_in_reservs;
+ el_ptr_1++, el_ptr_2++, result_set_el_ptr++)
+ *result_set_el_ptr = *el_ptr_1 | *el_ptr_2;
+}
+
+/* AND of the reservation sets. */
+static void
+reserv_sets_and (result, operand_1, operand_2)
+ reserv_sets_t result;
+ reserv_sets_t operand_1;
+ reserv_sets_t operand_2;
+{
+ set_el_t *el_ptr_1;
+ set_el_t *el_ptr_2;
+ set_el_t *result_set_el_ptr;
+
+ if (result == NULL || operand_1 == NULL || operand_2 == NULL)
+ abort ();
+ for (el_ptr_1 = operand_1, el_ptr_2 = operand_2, result_set_el_ptr = result;
+ el_ptr_1 < operand_1 + els_in_reservs;
+ el_ptr_1++, el_ptr_2++, result_set_el_ptr++)
+ *result_set_el_ptr = *el_ptr_1 & *el_ptr_2;
+}
+
+/* The function outputs string representation of units reservation on
+ cycle START_CYCLE in the reservation set. The function uses repeat
+ construction if REPETITION_NUM > 1. */
+static void
+output_cycle_reservs (f, reservs, start_cycle, repetition_num)
+ FILE *f;
+ reserv_sets_t reservs;
+ int start_cycle;
+ int repetition_num;
+{
+ int unit_num;
+ int reserved_units_num;
+
+ reserved_units_num = 0;
+ for (unit_num = 0; unit_num < description->units_num; unit_num++)
+ if (TEST_BIT (reservs, start_cycle * els_in_cycle_reserv
+ * sizeof (set_el_t) * CHAR_BIT + unit_num))
+ reserved_units_num++;
+ if (repetition_num <= 0)
+ abort ();
+ if (repetition_num != 1 && reserved_units_num > 1)
+ fprintf (f, "(");
+ reserved_units_num = 0;
+ for (unit_num = 0;
+ unit_num < description->units_num;
+ unit_num++)
+ if (TEST_BIT (reservs, start_cycle * els_in_cycle_reserv
+ * sizeof (set_el_t) * CHAR_BIT + unit_num))
+ {
+ if (reserved_units_num != 0)
+ fprintf (f, "+");
+ reserved_units_num++;
+ fprintf (f, "%s", units_array [unit_num]->name);
+ }
+ if (reserved_units_num == 0)
+ fprintf (f, NOTHING_NAME);
+ if (repetition_num <= 0)
+ abort ();
+ if (reserved_units_num != 0 && repetition_num != 1)
+ {
+ if (reserved_units_num > 1)
+ fprintf (f, ")");
+ fprintf (f, "*%d", repetition_num);
+ }
+}
+
+/* The function outputs string representation of units reservation in
+ the reservation set. */
+static void
+output_reserv_sets (f, reservs)
+ FILE *f;
+ reserv_sets_t reservs;
+{
+ int start_cycle = 0;
+ int cycle;
+ int repetition_num;
+
+ repetition_num = 0;
+ for (cycle = 0; cycle < max_cycles_num; cycle++)
+ if (repetition_num == 0)
+ {
+ repetition_num++;
+ start_cycle = cycle;
+ }
+ else if (memcmp
+ ((char *) reservs + start_cycle * els_in_cycle_reserv
+ * sizeof (set_el_t),
+ (char *) reservs + cycle * els_in_cycle_reserv
+ * sizeof (set_el_t),
+ els_in_cycle_reserv * sizeof (set_el_t)) == 0)
+ repetition_num++;
+ else
+ {
+ if (start_cycle != 0)
+ fprintf (f, ", ");
+ output_cycle_reservs (f, reservs, start_cycle, repetition_num);
+ repetition_num = 1;
+ start_cycle = cycle;
+ }
+ if (start_cycle < max_cycles_num)
+ {
+ if (start_cycle != 0)
+ fprintf (f, ", ");
+ output_cycle_reservs (f, reservs, start_cycle, repetition_num);
+ }
+}
+
+/* The following function returns free node state for AUTOMATON. It
+ may be new allocated node or node freed eralier. The function also
+ allocates reservation set if WITH_RESERVS has nonzero value. */
+static state_t
+get_free_state (with_reservs, automaton)
+ int with_reservs;
+ automaton_t automaton;
+{
+ state_t result;
+
+ if (max_cycles_num <= 0 || automaton == NULL)
+ abort ();
+ if (VLA_PTR_LENGTH (free_states) != 0)
+ {
+ result = VLA_PTR (free_states, VLA_PTR_LENGTH (free_states) - 1);
+ VLA_PTR_SHORTEN (free_states, 1);
+ result->automaton = automaton;
+ result->first_out_arc = NULL;
+ result->it_was_placed_in_stack_for_NDFA_forming = 0;
+ result->it_was_placed_in_stack_for_DFA_forming = 0;
+ result->component_states = NULL;
+ result->longest_path_length = UNDEFINED_LONGEST_PATH_LENGTH;
+ }
+ else
+ {
+#ifndef NDEBUG
+ allocated_states_num++;
+#endif
+ result = create_node (sizeof (struct state));
+ result->automaton = automaton;
+ result->first_out_arc = NULL;
+ result->unique_num = curr_unique_state_num;
+ result->longest_path_length = UNDEFINED_LONGEST_PATH_LENGTH;
+&nb