2003-03-15 Aldy Hernandez <aldyh@redhat.com>

[pf3gnuchains/gcc-fork.git] / gcc / doc / md.texi
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 8fa221d..da7e38b 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -1,5 +1,5 @@
-@c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2000, 2001
-@c Free Software Foundation, Inc.
+@c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 1999, 2000, 2001,
+@c 2002, 2003 Free Software Foundation, Inc.
  @c This is part of the GCC manual.
  @c For copying conditions, see the file gcc.texi.
  
@@ -38,6 +38,7 @@ See the next chapter for information on the C header file.
  * Expander Definitions::Generating a sequence of several RTL insns
                            for a standard operation.
  * Insn Splitting::      Splitting Instructions into Multiple Instructions.
+* Including Patterns::      Including Patterns in Machine Descriptions.
  * Peephole Definitions::Defining machine-specific peephole optimizations.
  * Insn Attributes::     Specifying the value of attributes for generated insns.
  * Conditional Execution::Generating @code{define_insn} patterns for
@@ -157,7 +158,9 @@ available in a particular run.
  For nameless patterns, the condition is applied only when matching an
  individual insn, and only after the insn has matched the pattern's
  recognition template.  The insn's operands may be found in the vector
-@code{operands}.
+@code{operands}.  For an insn where the condition has once matched, it
+can't be used to control register allocation, for example by excluding
+certain hard registers or hard register combinations.
  
  @item
  The @dfn{output template}: a string that says how to output matching
@@ -184,10 +187,10 @@ Here is an actual example of an instruction pattern, for the 68000/68020.
          (match_operand:SI 0 "general_operand" "rm"))]
    ""
    "*
-@{ 
+@{
    if (TARGET_68020 || ! ADDRESS_REG_P (operands[0]))
      return \"tstl %0\";
-  return \"cmpl #0,%0\"; 
+  return \"cmpl #0,%0\";
  @}")
  @end example
  
@@ -199,10 +202,10 @@ This can also be written using braced strings:
    [(set (cc0)
          (match_operand:SI 0 "general_operand" "rm"))]
    ""
-@{ 
+@{
    if (TARGET_68020 || ! ADDRESS_REG_P (operands[0]))
      return "tstl %0";
-  return "cmpl #0,%0"; 
+  return "cmpl #0,%0";
  @})
  @end example
  
@@ -448,8 +451,6 @@ those listed in the @code{match_parallel}.
  A typical use of @code{match_parallel} is to match load and store
  multiple expressions, which can contain a variable number of elements
  in a @code{parallel}.  For example,
-@c the following is *still* going over.  need to change the code.
-@c also need to work on grouping of this example.  --mew 1feb93
  
  @smallexample
  (define_insn ""
@@ -463,7 +464,7 @@ in a @code{parallel}.  For example,
  @end smallexample
  
  This example comes from @file{a29k.md}.  The function
-@code{load_multiple_operations} is defined in @file{a29k.c} and checks
+@code{load_multiple_operation} is defined in @file{a29k.c} and checks
  that subsequent elements in the @code{parallel} are the same as the
  @code{set} in the pattern, except that they are referencing subsequent
  registers and memory locations.
@@ -702,8 +703,8 @@ as follows, having the output control string start with a @samp{@@}:
  @end ifset
  
  @c Most of this node appears by itself (in a different place) even
-@c when the INTERNALS flag is clear.  Passages that require the full
-@c manual's context are conditionalized to appear only in the full manual.
+@c when the INTERNALS flag is clear.  Passages that require the internals
+@c manual's context are conditionalized to appear only in the internals manual.
  @ifset INTERNALS
  @node Constraints
  @section Operand Constraints
@@ -844,8 +845,8 @@ that of the host machine (on which the compiler is running).
  
  @cindex @samp{F} in constraint
  @item @samp{F}
-An immediate floating operand (expression code @code{const_double}) is
-allowed.
+An immediate floating operand (expression code @code{const_double} or
+@code{const_vector}) is allowed.
  
  @cindex @samp{G} in constraint
  @cindex @samp{H} in constraint
@@ -896,6 +897,13 @@ An operand that matches the specified operand number is allowed.  If a
  digit is used together with letters within the same alternative, the
  digit should come last.
  
+This number is allowed to be more than a single digit.  If multiple
+digits are encountered consecutively, they are interpreted as a single
+decimal integer.  There is scant chance for ambiguity, since to-date
+it has never been desirable that @samp{10} be interpreted as matching
+either operand 1 @emph{or} operand 0.  Should this be desired, one
+can use multiple alternatives instead.
+
  @cindex matching constraint
  @cindex constraint, matching
  This is called a @dfn{matching constraint} and what it really means is
@@ -957,7 +965,7 @@ The machine description macro @code{REG_CLASS_FROM_LETTER} has first
  cut at the otherwise unused letters.  If it evaluates to @code{NO_REGS},
  then @code{EXTRA_CONSTRAINT} is evaluated.
  
-A typical use for @code{EXTRA_CONSTRANT} would be to distinguish certain
+A typical use for @code{EXTRA_CONSTRAINT} would be to distinguish certain
  types of memory references that affect other insn operands.
  @end ifset
  @end table
@@ -1248,6 +1256,8 @@ instruction is defined:
    @dots{})
  @end smallexample
  @end ifset
+GCC can only handle one commutative pair in an asm; if you use more, 
+the compiler may fail.
  
  @cindex @samp{#} in constraint
  @item #
@@ -1255,13 +1265,13 @@ Says that all following characters, up to the next comma, are to be
  ignored as a constraint.  They are significant only for choosing
  register preferences.
  
-@ifset INTERNALS
  @cindex @samp{*} in constraint
  @item *
  Says that the following character should be ignored when choosing
  register preferences.  @samp{*} has no effect on the meaning of the
  constraint as a constraint, and no effect on reloading.
  
+@ifset INTERNALS
  Here is an example: the 68000 has an instruction to sign-extend a
  halfword in a data register, and can also sign-extend a value by
  copying it into an address register.  While either kind of register is
@@ -1295,12 +1305,12 @@ general-purpose registers respectively; @pxref{Simple Constraints}), and
  @samp{I}, usually the letter indicating the most common
  immediate-constant format.
  
-For each machine architecture, the @file{config/@var{machine}.h} file
-defines additional constraints.  These constraints are used by the
-compiler itself for instruction generation, as well as for @code{asm}
-statements; therefore, some of the constraints are not particularly
-interesting for @code{asm}.  The constraints are defined through these
-macros:
+For each machine architecture, the
+@file{config/@var{machine}/@var{machine}.h} file defines additional
+constraints.  These constraints are used by the compiler itself for
+instruction generation, as well as for @code{asm} statements; therefore,
+some of the constraints are not particularly interesting for @code{asm}.
+The constraints are defined through these macros:
  
  @table @code
  @item REG_CLASS_FROM_LETTER
@@ -1366,60 +1376,6 @@ An item in the constant pool
  A symbol in the text segment of the current file
  @end table
  
-@item AMD 29000 family---@file{a29k.h}
-@table @code
-@item l
-Local register 0
-
-@item b
-Byte Pointer (@samp{BP}) register
-
-@item q
-@samp{Q} register
-
-@item h
-Special purpose register
-
-@item A
-First accumulator register
-
-@item a
-Other accumulator register
-
-@item f
-Floating point register
-
-@item I
-Constant greater than 0, less than 0x100
-
-@item J
-Constant greater than 0, less than 0x10000
-
-@item K
-Constant whose high 24 bits are on (1)
-
-@item L
-16-bit constant whose high 8 bits are on (1)
-
-@item M
-32-bit constant whose high 16 bits are on (1)
-
-@item N
-32-bit negative constant that fits in 8 bits
-
-@item O
-The constant 0x80000000 or, on the 29050, any 32-bit constant
-whose low 16 bits are 0.
-
-@item P
-16-bit negative constant that fits in 8 bits
-
-@item G
-@itemx H
-A floating point constant (in @code{asm} statements, use the machine
-independent @samp{E} or @samp{F} instead)
-@end table
-
  @item AVR family---@file{avr.h}
  @table @code
  @item l
@@ -1598,6 +1554,10 @@ Second floating point register
  @item c
  @samp{c} register
  
+@item C
+Specifies constant that can be easily constructed in SSE register without
+loading it from memory.
+
  @item d
  @samp{d} register
  
@@ -1673,6 +1633,232 @@ Floating point 0
  Floating point 1
  @end table
  
+@item Intel IA-64---@file{ia64.h}
+@table @code
+@item a
+General register @code{r0} to @code{r3} for @code{addl} instruction
+
+@item b
+Branch register
+
+@item c
+Predicate register (@samp{c} as in ``conditional'')
+
+@item d
+Application register residing in M-unit
+
+@item e
+Application register residing in I-unit
+
+@item f
+Floating-point register
+
+@item m
+Memory operand.
+Remember that @samp{m} allows postincrement and postdecrement which
+require printing with @samp{%Pn} on IA-64.
+Use @samp{S} to disallow postincrement and postdecrement.
+
+@item G
+Floating-point constant 0.0 or 1.0
+
+@item I
+14-bit signed integer constant
+
+@item J
+22-bit signed integer constant
+
+@item K
+8-bit signed integer constant for logical instructions
+
+@item L
+8-bit adjusted signed integer constant for compare pseudo-ops
+
+@item M
+6-bit unsigned integer constant for shift counts
+
+@item N
+9-bit signed integer constant for load and store postincrements
+
+@item O
+The constant zero
+
+@item P
+0 or -1 for @code{dep} instruction
+
+@item Q
+Non-volatile memory for floating-point loads and stores
+
+@item R
+Integer constant in the range 1 to 4 for @code{shladd} instruction
+
+@item S
+Memory operand except postincrement and postdecrement
+@end table
+
+@item FRV---@file{frv.h}
+@table @code
+@item a
+Register in the class @code{ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item b
+Register in the class @code{EVEN_ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item c
+Register in the class @code{CC_REGS} (@code{fcc0} to @code{fcc3} and
+@code{icc0} to @code{icc3}).
+
+@item d
+Register in the class @code{GPR_REGS} (@code{gr0} to @code{gr63}).
+
+@item e
+Register in the class @code{EVEN_REGS} (@code{gr0} to @code{gr63}).
+Odd registers are excluded not in the class but through the use of a machine
+mode larger than 4 bytes.
+
+@item f
+Register in the class @code{FPR_REGS} (@code{fr0} to @code{fr63}).
+
+@item h
+Register in the class @code{FEVEN_REGS} (@code{fr0} to @code{fr63}).
+Odd registers are excluded not in the class but through the use of a machine
+mode larger than 4 bytes.
+
+@item l
+Register in the class @code{LR_REG} (the @code{lr} register).
+
+@item q
+Register in the class @code{QUAD_REGS} (@code{gr2} to @code{gr63}).
+Register numbers not divisible by 4 are excluded not in the class but through
+the use of a machine mode larger than 8 bytes.
+
+@item t
+Register in the class @code{ICC_REGS} (@code{icc0} to @code{icc3}).
+
+@item u
+Register in the class @code{FCC_REGS} (@code{fcc0} to @code{fcc3}).
+
+@item v
+Register in the class @code{ICR_REGS} (@code{cc4} to @code{cc7}).
+
+@item w
+Register in the class @code{FCR_REGS} (@code{cc0} to @code{cc3}).
+
+@item x
+Register in the class @code{QUAD_FPR_REGS} (@code{fr0} to @code{fr63}).
+Register numbers not divisible by 4 are excluded not in the class but through
+the use of a machine mode larger than 8 bytes.
+
+@item z
+Register in the class @code{SPR_REGS} (@code{lcr} and @code{lr}).
+
+@item A
+Register in the class @code{QUAD_ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item B
+Register in the class @code{ACCG_REGS} (@code{accg0} to @code{accg7}).
+
+@item C
+Register in the class @code{CR_REGS} (@code{cc0} to @code{cc7}).
+
+@item G
+Floating point constant zero
+
+@item I
+6-bit signed integer constant
+
+@item J
+10-bit signed integer constant
+
+@item L
+16-bit signed integer constant
+
+@item M
+16-bit unsigned integer constant
+
+@item N
+12-bit signed integer constant that is negative---i.e.@: in the
+range of @minus{}2048 to @minus{}1
+
+@item O
+Constant zero
+
+@item P
+12-bit signed integer constant that is greater than zero---i.e.@: in the
+range of 1 to 2047.
+
+@end table
+
+@item IP2K---@file{ip2k.h}
+@table @code
+@item a
+@samp{DP} or @samp{IP} registers (general address)
+
+@item f
+@samp{IP} register
+
+@item j
+@samp{IPL} register
+
+@item k
+@samp{IPH} register
+
+@item b
+@samp{DP} register
+
+@item y
+@samp{DPH} register
+
+@item z
+@samp{DPL} register
+
+@item q
+@samp{SP} register
+
+@item c
+@samp{DP} or @samp{SP} registers (offsettable address)
+
+@item d
+Non-pointer registers (not @samp{SP}, @samp{DP}, @samp{IP})
+
+@item u
+Non-SP registers (everything except @samp{SP})
+
+@item R
+Indirect thru @samp{IP} - Avoid this except for @code{QImode}, since we
+can't access extra bytes
+
+@item S
+Indirect thru @samp{SP} or @samp{DP} with short displacement (0..127)
+
+@item T
+Data-section immediate value
+
+@item I
+Integers from @minus{}255 to @minus{}1
+
+@item J
+Integers from 0 to 7---valid bit number in a register
+
+@item K
+Integers from 0 to 127---valid displacement for addressing mode
+
+@item L
+Integers from 1 to 127
+
+@item M
+Integer @minus{}1
+
+@item N
+Integer 1
+
+@item O
+Zero
+
+@item P
+Integers from 0 to 255
+@end table
+
  @item MIPS---@file{mips.h}
  @table @code
  @item d
@@ -1853,6 +2039,17 @@ Zero
  32-bit constant with the low 12 bits clear (a constant that can be
  loaded with the @code{sethi} instruction)
  
+@item L
+A constant in the range supported by @code{movcc} instructions
+
+@item M
+A constant in the range supported by @code{movrcc} instructions
+
+@item N
+Same as @samp{K}, except that it verifies that bits that are not in the
+lower 32-bit range are all zero.  Must be used instead of @samp{K} for
+modes wider than @code{SImode}
+
  @item G
  Floating-point zero
  
@@ -1880,6 +2077,9 @@ Memory address aligned to an 8-byte boundary
  @item U
  Even register
  
+@item W
+Memory address for @samp{e} constraint registers.
+
  @end table
  
  @item TMS320C3x/C4x---@file{c4x.h}
@@ -1995,6 +2195,98 @@ Symbolic constant suitable for use with the @code{larl} instruction
  
  @end table
  
+@item Xstormy16---@file{stormy16.h}
+@table @code
+@item a
+Register r0.
+
+@item b
+Register r1.
+
+@item c
+Register r2.
+
+@item d
+Register r8.
+
+@item e
+Registers r0 through r7.
+
+@item t
+Registers r0 and r1.
+
+@item y
+The carry register.
+
+@item z
+Registers r8 and r9.
+
+@item I
+A constant between 0 and 3 inclusive.
+
+@item J
+A constant that has exactly one bit set.
+
+@item K
+A constant that has exactly one bit clear.
+
+@item L
+A constant between 0 and 255 inclusive.
+
+@item M
+A constant between @minus{}255 and 0 inclusive.
+
+@item N
+A constant between @minus{}3 and 0 inclusive.
+
+@item O
+A constant between 1 and 4 inclusive.
+
+@item P
+A constant between @minus{}4 and @minus{}1 inclusive.
+
+@item Q
+A memory reference that is a stack push.
+
+@item R
+A memory reference that is a stack pop.
+
+@item S
+A memory reference that refers to an constant address of known value.
+
+@item T
+The register indicated by Rx (not implemented yet).
+
+@item U
+A constant that is not between 2 and 15 inclusive.
+
+@end table
+
+@item Xtensa---@file{xtensa.h}
+@table @code
+@item a
+General-purpose 32-bit register
+
+@item b
+One-bit boolean register
+
+@item A
+MAC16 40-bit accumulator register
+
+@item I
+Signed 12-bit integer constant, for use in MOVI instructions
+
+@item J
+Signed 8-bit integer constant, for use in ADDI instructions
+
+@item K
+Integer constant valid for BccI instructions
+
+@item L
+Unsigned constant valid for BccUI instructions
+
+@end table
+
  @end table
  
  @ifset INTERNALS
@@ -2019,7 +2311,9 @@ This instruction pattern moves data with that machine mode from operand
  If operand 0 is a @code{subreg} with mode @var{m} of a register whose
  own mode is wider than @var{m}, the effect of this instruction is
  to store the specified value in the part of the register that corresponds
-to mode @var{m}.  The effect on the rest of the register is undefined.
+to mode @var{m}.  Bits outside of @var{m}, but which are within the
+same target word as the @code{subreg} are undefined.  Bits which are
+outside the target word are left unchanged.
  
  This class of patterns is special in several ways.  First of all, each
  of these names up to and including full word size @emph{must} be defined,
@@ -2068,9 +2362,8 @@ machines explicit memory references will get optional reloads.
  If a scratch register is required to move an object to or from memory,
  it can be allocated using @code{gen_reg_rtx} prior to life analysis.
  
-If there are cases needing
-scratch registers after reload, you must define
-@code{SECONDARY_INPUT_RELOAD_CLASS} and perhaps also
+If there are cases which need scratch registers during or after reload,
+you must define @code{SECONDARY_INPUT_RELOAD_CLASS} and/or
  @code{SECONDARY_OUTPUT_RELOAD_CLASS} to detect them, and provide
  patterns @samp{reload_in@var{m}} or @samp{reload_out@var{m}} to handle
  them.  @xref{Register Classes}.
@@ -2109,7 +2402,7 @@ register.  See the discussion of the @code{SECONDARY_RELOAD_CLASS}
  macro in @pxref{Register Classes}.
  
  There are special restrictions on the form of the @code{match_operand}s
-used in these patterns.  First, only the predicate for the reload 
+used in these patterns.  First, only the predicate for the reload
  operand is examined, i.e., @code{reload_in} examines operand 1, but not
  the predicates for operand 0 or 2.  Second, there may be only one
  alternative in the constraints.  Third, only a single register class
@@ -2147,8 +2440,7 @@ Write the generated insn as a @code{parallel} with elements being a
  @code{set} of one register from the appropriate memory location (you may
  also need @code{use} or @code{clobber} elements).  Use a
  @code{match_parallel} (@pxref{RTL Template}) to recognize the insn.  See
-@file{a29k.md} and @file{rs6000.md} for examples of the use of this insn
-pattern.
+@file{rs6000.md} for examples of the use of this insn pattern.
  
  @cindex @samp{store_multiple} instruction pattern
  @item @samp{store_multiple}
@@ -2159,7 +2451,7 @@ operand 2 is a constant: the number of consecutive registers.
  
  @cindex @code{push@var{m}} instruction pattern
  @item @samp{push@var{m}}
-Output an push instruction.  Operand 0 is value to push.  Used only when
+Output a push instruction.  Operand 0 is value to push.  Used only when
  @code{PUSH_ROUNDING} is defined.  For historical reason, this pattern may be
  missing and in such case an @code{mov} expander is used instead, with a
  @code{MEM} expression forming the push operation.  The @code{mov} expander
@@ -2275,7 +2567,111 @@ Store the absolute value of operand 1 into operand 0.
  Store the square root of operand 1 into operand 0.
  
  The @code{sqrt} built-in function of C always uses the mode which
-corresponds to the C data type @code{double}.
+corresponds to the C data type @code{double} and the @code{sqrtf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{cos@var{m}2} instruction pattern
+@item @samp{cos@var{m}2}
+Store the cosine of operand 1 into operand 0.
+
+The @code{cos} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{cosf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{sin@var{m}2} instruction pattern
+@item @samp{sin@var{m}2}
+Store the sine of operand 1 into operand 0.
+
+The @code{sin} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{sinf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{exp@var{m}2} instruction pattern
+@item @samp{exp@var{m}2}
+Store the exponential of operand 1 into operand 0.
+
+The @code{exp} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{expf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{log@var{m}2} instruction pattern
+@item @samp{log@var{m}2}
+Store the natural logarithm of operand 1 into operand 0.
+
+The @code{log} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{logf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{pow@var{m}3} instruction pattern
+@item @samp{pow@var{m}3}
+Store the value of operand 1 raised to the exponent operand 2
+into operand 0.
+
+The @code{pow} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{powf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{atan2@var{m}3} instruction pattern
+@item @samp{atan2@var{m}3}
+Store the arc tangent (inverse tangent) of operand 1 divided by
+operand 2 into operand 0, using the signs of both arguments to
+determine the quadrant of the result.
+
+The @code{atan2} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{atan2f}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{floor@var{m}2} instruction pattern
+@item @samp{floor@var{m}2}
+Store the largest integral value not greater than argument.
+
+The @code{floor} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{floorf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{trunc@var{m}2} instruction pattern
+@item @samp{trunc@var{m}2}
+Store the argument rounded to integer towards zero.
+
+The @code{trunc} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{truncf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{round@var{m}2} instruction pattern
+@item @samp{round@var{m}2}
+Store the argument rounded to integer away from zero.
+
+The @code{round} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{roundf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{ceil@var{m}2} instruction pattern
+@item @samp{ceil@var{m}2}
+Store the argument rounded to integer away from zero.
+
+The @code{ceil} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{ceilf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{nearbyint@var{m}2} instruction pattern
+@item @samp{nearbyint@var{m}2}
+Store the argument rounded according to the default rounding mode
+
+The @code{nearbyint} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{nearbyintf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
  
  @cindex @code{ffs@var{m}2} instruction pattern
  @item @samp{ffs@var{m}2}
@@ -2288,6 +2684,36 @@ generating the instruction.
  The @code{ffs} built-in function of C always uses the mode which
  corresponds to the C data type @code{int}.
  
+@cindex @code{clz@var{m}2} instruction pattern
+@item @samp{clz@var{m}2}
+Store into operand 0 the number of leading 0-bits in @var{x}, starting
+at the most significant bit position.  If @var{x} is 0, the result is
+undefined.  @var{m} is the mode of operand 0; operand 1's mode is
+specified by the instruction pattern, and the compiler will convert the
+operand to that mode before generating the instruction.
+
+@cindex @code{ctz@var{m}2} instruction pattern
+@item @samp{ctz@var{m}2}
+Store into operand 0 the number of trailing 0-bits in @var{x}, starting
+at the least significant bit position.  If @var{x} is 0, the result is
+undefined.  @var{m} is the mode of operand 0; operand 1's mode is
+specified by the instruction pattern, and the compiler will convert the
+operand to that mode before generating the instruction.
+
+@cindex @code{popcount@var{m}2} instruction pattern
+@item @samp{popcount@var{m}2}
+Store into operand 0 the number of 1-bits in @var{x}.  @var{m} is the
+mode of operand 0; operand 1's mode is specified by the instruction
+pattern, and the compiler will convert the operand to that mode before
+generating the instruction.
+
+@cindex @code{parity@var{m}2} instruction pattern
+@item @samp{parity@var{m}2}
+Store into operand 0 the parity of @var{x}, i.@:e. the number of 1-bits
+in @var{x} modulo 2.  @var{m} is the mode of operand 0; operand 1's mode
+is specified by the instruction pattern, and the compiler will convert
+the operand to that mode before generating the instruction.
+
  @cindex @code{one_cmpl@var{m}2} instruction pattern
  @item @samp{one_cmpl@var{m}2}
  Store the bitwise-complement of operand 1 into operand 0.
@@ -2479,6 +2905,13 @@ codes and vice versa.
  If the machine does not have conditional move instructions, do not
  define these patterns.
  
+@cindex @code{add@var{mode}cc} instruction pattern
+@item @samp{mov@var{mode}cc}
+Similar to @samp{mov@var{mode}cc} but for conditional addition.  Conditionally
+move operand 2 or (operands 2 + operand 3) into operand 0 according to the
+comparison in operand 1.  If the comparison is true, operand 2 is moved into
+operand 0, otherwise operand 3 is moved.
+
  @cindex @code{s@var{cond}} instruction pattern
  @item @samp{s@var{cond}}
  Store zero or nonzero in the operand according to the condition codes.
@@ -2572,11 +3005,11 @@ insn.
  @cindex @code{call_value_pop} instruction pattern
  @item @samp{call_pop}, @samp{call_value_pop}
  Similar to @samp{call} and @samp{call_value}, except used if defined and
-if @code{RETURN_POPS_ARGS} is non-zero.  They should emit a @code{parallel}
+if @code{RETURN_POPS_ARGS} is nonzero.  They should emit a @code{parallel}
  that contains both the function call and a @code{set} to indicate the
  adjustment made to the frame pointer.
  
-For machines where @code{RETURN_POPS_ARGS} can be non-zero, the use of these
+For machines where @code{RETURN_POPS_ARGS} can be nonzero, the use of these
  patterns increases the number of functions for which the frame pointer
  can be eliminated, if desired.
  
@@ -2611,7 +3044,7 @@ to save any registers or allocate stack space.
  @findex reload_completed
  @findex leaf_function_p
  For such machines, the condition specified in this pattern should only
-be true when @code{reload_completed} is non-zero and the function's
+be true when @code{reload_completed} is nonzero and the function's
  epilogue would only be a single instruction.  For machines with register
  windows, the routine @code{leaf_function_p} may be used to determine if
  a register window push is required.
@@ -2710,9 +3143,9 @@ that the jump optimizer will not delete the table as unreachable code.
  @cindex @code{decrement_and_branch_until_zero} instruction pattern
  @item @samp{decrement_and_branch_until_zero}
  Conditional branch instruction that decrements a register and
-jumps if the register is non-zero.  Operand 0 is the register to
+jumps if the register is nonzero.  Operand 0 is the register to
  decrement and test; operand 1 is the label to jump to if the
-register is non-zero.  @xref{Looping Patterns}.
+register is nonzero.  @xref{Looping Patterns}.
  
  This optional instruction pattern is only used by the combiner,
  typically for loops reversed by the loop optimizer when strength
@@ -2721,13 +3154,13 @@ reduction is enabled.
  @cindex @code{doloop_end} instruction pattern
  @item @samp{doloop_end}
  Conditional branch instruction that decrements a register and jumps if
-the register is non-zero.  This instruction takes five operands: Operand
+the register is nonzero.  This instruction takes five operands: Operand
  0 is the register to decrement and test; operand 1 is the number of loop
  iterations as a @code{const_int} or @code{const0_rtx} if this cannot be
  determined until run-time; operand 2 is the actual or estimated maximum
  number of iterations as a @code{const_int}; operand 3 is the number of
  enclosed loops as a @code{const_int} (an innermost loop has a value of
-1); operand 4 is the label to jump to if the register is non-zero.
+1); operand 4 is the label to jump to if the register is nonzero.
  @xref{Looping Patterns}.
  
  This optional instruction pattern should be defined for machines with
@@ -2741,8 +3174,8 @@ too large for this instruction, make it fail.
  @cindex @code{doloop_begin} instruction pattern
  @item @samp{doloop_begin}
  Companion instruction to @code{doloop_end} required for machines that
-need to perform some initialisation, such as loading special registers
-used by a low-overhead looping instruction.  If initialisation insns do
+need to perform some initialization, such as loading special registers
+used by a low-overhead looping instruction.  If initialization insns do
  not always need to be emitted, use a @code{define_expand}
  (@pxref{Expander Definitions}) and make it fail.
  
@@ -2947,8 +3380,8 @@ the function should return.  This will normally need to copied by the
  pattern to some special register or memory location.
  
  This pattern only needs to be defined if call frame exception handling
-is to be used, and simple moves to @code{EH_RETURN_STACKADJ_RTX} and
-@code{EH_RETURN_HANDLER_RTX} are not sufficient.
+is to be used, and simple moves involving @code{EH_RETURN_STACKADJ_RTX}
+and @code{EH_RETURN_HANDLER_RTX} are not sufficient.
  
  @cindex @code{prologue} instruction pattern
  @anchor{prologue instruction pattern}
@@ -2966,7 +3399,7 @@ instruction scheduling.
  @cindex @code{epilogue} instruction pattern
  @anchor{epilogue instruction pattern}
  @item @samp{epilogue}
-This pattern, if defined, emits RTL for exit from a function.  The function
+This pattern emits RTL for exit from a function.  The function
  exit is responsible for deallocating the stack frame, restoring callee saved
  registers and emitting the return instruction.
  
@@ -3008,13 +3441,21 @@ A typical @code{conditional_trap} pattern looks like
    "@dots{}")
  @end smallexample
  
-@cindex @code{cycle_display} instruction pattern
-@item @samp{cycle_display}
+@cindex @code{prefetch} instruction pattern
+@item @samp{prefetch}
+
+This pattern, if defined, emits code for a non-faulting data prefetch
+instruction.  Operand 0 is the address of the memory to prefetch.  Operand 1
+is a constant 1 if the prefetch is preparing for a write to the memory
+address, or a constant 0 otherwise.  Operand 2 is the expected degree of
+temporal locality of the data and is a value between 0 and 3, inclusive; 0
+means that the data has no temporal locality, so it need not be left in the
+cache after the access; 3 means that the data has a high degree of temporal
+locality and should be left in all levels of cache possible;  1 and 2 mean,
+respectively, a low or moderate degree of temporal locality.
  
-This pattern, if present, will be emitted by the instruction scheduler at
-the beginning of each new clock cycle.  This can be used for annotating the
-assembler output with cycle counts.  Operand 0 is a @code{const_int} that
-holds the clock cycle.
+Targets that do not support write prefetches or locality hints can ignore
+the values of operands 1 and 2.
  
  @end table
  
@@ -3186,7 +3627,7 @@ multiple condition registers, use a pseudo register.
  @findex next_cc0_user
  On some machines, the type of branch instruction generated may depend on
  the way the condition code was produced; for example, on the 68k and
-Sparc, setting the condition code directly from an add or subtract
+SPARC, setting the condition code directly from an add or subtract
  instruction does not clear the overflow bit the way that a test
  instruction does, so a different branch instruction must be used for
  some conditional branches.  For machines that use @code{(cc0)}, the set
@@ -3205,7 +3646,7 @@ different formats of the condition code register.
  Registers used to store the condition code value should have a mode that
  is in class @code{MODE_CC}.  Normally, it will be @code{CCmode}.  If
  additional modes are required (as for the add example mentioned above in
-the Sparc), define the macro @code{EXTRA_CC_MODES} to list the
+the SPARC), define the macro @code{EXTRA_CC_MODES} to list the
  additional modes required (@pxref{Condition Code}).  Also define
  @code{SELECT_CC_MODE} to choose a mode given an operand of a compare.
  
@@ -3217,7 +3658,7 @@ be specified at that time.
  If the cases that require different modes would be made by instruction
  combination, the macro @code{SELECT_CC_MODE} determines which machine
  mode should be used for the comparison result.  The patterns should be
-written using that mode.  To support the case of the add on the Sparc
+written using that mode.  To support the case of the add on the SPARC
  discussed above, we have the pattern
  
  @smallexample
@@ -3231,7 +3672,7 @@ discussed above, we have the pattern
    "@dots{}")
  @end smallexample
  
-The @code{SELECT_CC_MODE} macro on the Sparc returns @code{CC_NOOVmode}
+The @code{SELECT_CC_MODE} macro on the SPARC returns @code{CC_NOOVmode}
  for comparisons whose argument is a @code{plus}.
  
  @node Looping Patterns
@@ -3239,7 +3680,7 @@ for comparisons whose argument is a @code{plus}.
  @cindex looping instruction patterns
  @cindex defining looping instruction patterns
  
-Some machines have special jump instructions that can be utilised to
+Some machines have special jump instructions that can be utilized to
  make loops more efficient.  A common example is the 68000 @samp{dbra}
  instruction which performs a decrement of a register and a branch if the
  result was greater than zero.  Other machines, in particular digital
@@ -3251,9 +3692,9 @@ iterations.  This avoids the need for fetching and executing a
  @samp{dbra}-like instruction and avoids pipeline stalls associated with
  the jump.
  
-GCC has three special named patterns to support low overhead looping,
-@samp{decrement_and_branch_until_zero}, @samp{doloop_begin}, and
-@samp{doloop_end}.  The first pattern,
+GCC has three special named patterns to support low overhead looping.
+They are @samp{decrement_and_branch_until_zero}, @samp{doloop_begin},
+and @samp{doloop_end}.  The first pattern,
  @samp{decrement_and_branch_until_zero}, is not emitted during RTL
  generation but may be emitted during the instruction combination phase.
  This requires the assistance of the loop optimizer, using information
@@ -3308,13 +3749,13 @@ pattern will not be matched by the combiner.
  @end smallexample
  
  The other two special looping patterns, @samp{doloop_begin} and
-@samp{doloop_end}, are emitted by the loop optimiser for certain
+@samp{doloop_end}, are emitted by the loop optimizer for certain
  well-behaved loops with a finite number of loop iterations using
  information collected during strength reduction.
  
  The @samp{doloop_end} pattern describes the actual looping instruction
  (or the implicit looping operation) and the @samp{doloop_begin} pattern
-is an optional companion pattern that can be used for initialisation
+is an optional companion pattern that can be used for initialization
  needed for some low-overhead looping instructions.
  
  Note that some machines require the actual looping instruction to be
@@ -3370,6 +3811,14 @@ For these operators, if only one operand is a @code{neg}, @code{not},
  @code{mult}, @code{plus}, or @code{minus} expression, it will be the
  first operand.
  
+@item
+In combinations of @code{neg}, @code{mult}, @code{plus}, and
+@code{minus}, the @code{neg} operations (if any) will be moved inside
+the operations as far as possible.  For instance, 
+@code{(neg (mult A B))} is canonicalized as @code{(mult (neg A) B)}, but
+@code{(plus (mult (neg A) B) C)} is canonicalized as
+@code{(minus A (mult B C))}.
+
  @cindex @code{compare}, canonicalization of
  @item
  For the @code{compare} operator, a constant is always the second operand
@@ -3679,13 +4128,14 @@ in the compiler.
  @cindex instruction splitting
  @cindex splitting instructions
  
-There are two cases where you should specify how to split a pattern into
-multiple insns.  On machines that have instructions requiring delay
-slots (@pxref{Delay Slots}) or that have instructions whose output is
-not available for multiple cycles (@pxref{Function Units}), the compiler
-phases that optimize these cases need to be able to move insns into
-one-instruction delay slots.  However, some insns may generate more than one
-machine instruction.  These insns cannot be placed into a delay slot.
+There are two cases where you should specify how to split a pattern
+into multiple insns.  On machines that have instructions requiring
+delay slots (@pxref{Delay Slots}) or that have instructions whose
+output is not available for multiple cycles (@pxref{Processor pipeline
+description}), the compiler phases that optimize these cases need to
+be able to move insns into one-instruction delay slots.  However, some
+insns may generate more than one machine instruction.  These insns
+cannot be placed into a delay slot.
  
  Often you can rewrite the single insn as a list of individual insns,
  each corresponding to one machine instruction.  The disadvantage of
@@ -3737,10 +4187,10 @@ Patterns are matched against @var{insn-pattern} in two different
  circumstances.  If an insn needs to be split for delay slot scheduling
  or insn scheduling, the insn is already known to be valid, which means
  that it must have been matched by some @code{define_insn} and, if
-@code{reload_completed} is non-zero, is known to satisfy the constraints
+@code{reload_completed} is nonzero, is known to satisfy the constraints
  of that @code{define_insn}.  In that case, the new insn patterns must
  also be insns that are matched by some @code{define_insn} and, if
-@code{reload_completed} is non-zero, must also satisfy the constraints
+@code{reload_completed} is nonzero, must also satisfy the constraints
  of those definitions.
  
  As an example of this usage of @code{define_split}, consider the following
@@ -3840,10 +4290,10 @@ are not valid.
  The splitter is allowed to split jump instructions into sequence of
  jumps or create new jumps in while splitting non-jump instructions.  As
  the central flowgraph and branch prediction information needs to be updated,
-several restriction apply. 
+several restriction apply.
  
  Splitting of jump instruction into sequence that over by another jump
-instruction is always valid, as compiler expect identical behaviour of new
+instruction is always valid, as compiler expect identical behavior of new
  jump.  When new sequence contains multiple jump instructions or new labels,
  more assistance is needed.  Splitter is required to create only unconditional
  jumps, or simple conditional jump instructions.  Additionally it must attach a
@@ -3853,6 +4303,7 @@ it was an simple conditional jump, @minus{}1 otherwise.  To simplify
  recomputing of edge frequencies, new sequence is required to have only
  forward jumps to the newly created labels.
  
+@findex define_insn_and_split
  For the common case where the pattern of a define_split exactly matches the
  pattern of a define_insn, use @code{define_insn_and_split}.  It looks like
  this:
@@ -3888,7 +4339,8 @@ from i386.md:
    "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
    "#"
    "&& reload_completed"
-  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+  [(parallel [(set (match_dup 0)
+                   (and:SI (match_dup 0) (const_int 65535)))
               (clobber (reg:CC 17))])]
    ""
    [(set_attr "type" "alu1")])
@@ -3903,6 +4355,80 @@ functionality as two separate @code{define_insn} and @code{define_split}
  patterns.  It exists for compactness, and as a maintenance tool to prevent
  having to ensure the two patterns' templates match.
  
+@node Including Patterns
+@section Including Patterns in Machine Descriptions.
+@cindex insn includes
+
+@findex include
+The @code{include} pattern tells the compiler tools where to
+look for patterns that are in files other than in the file
+@file{.md}. This is used only at build time and there is no preprocessing allowed.
+
+It looks like:
+
+@smallexample
+
+(include
+  @var{pathname})
+@end smallexample
+
+For example:
+
+@smallexample
+
+(include "filestuff")
+
+@end smallexample
+
+Where @var{pathname} is a string that specifies the location of the file,
+specifies the include file to be in @file{gcc/config/target/filestuff}. The
+directory @file{gcc/config/target} is regarded as the default directory.
+
+
+Machine descriptions may be split up into smaller more manageable subsections
+and placed into subdirectories.
+
+By specifying:
+
+@smallexample
+
+(include "BOGUS/filestuff")
+
+@end smallexample
+
+the include file is specified to be in @file{gcc/config/@var{target}/BOGUS/filestuff}.
+
+Specifying an absolute path for the include file such as;
+@smallexample
+
+(include "/u2/BOGUS/filestuff")
+
+@end smallexample
+is permitted but is not encouraged.
+
+@subsection RTL Generation Tool Options for Directory Search
+@cindex directory options .md
+@cindex options, directory search
+@cindex search options
+
+The @option{-I@var{dir}} option specifies directories to search for machine descriptions.
+For example:
+
+@smallexample
+
+genrecog -I/p1/abc/proc1 -I/p2/abcd/pro2 target.md
+
+@end smallexample
+
+
+Add the directory @var{dir} to the head of the list of directories to be
+searched for header files.  This can be used to override a system machine definition
+file, substituting your own version, since these directories are
+searched before the default machine description file directories.  If you use more than
+one @option{-I} option, the directories are scanned in left-to-right
+order; the standard default directory come after.
+
+
  @node Peephole Definitions
  @section Machine-Specific Peephole Optimizers
  @cindex peephole optimizer definitions
@@ -4229,7 +4755,7 @@ to track the condition codes.
  * Insn Lengths::        Computing the length of insns.
  * Constant Attributes:: Defining attributes that are constant.
  * Delay Slots::         Defining delay slots required for a machine.
-* Function Units::      Specifying information for insn scheduling.
+* Processor pipeline description:: Specifying information for insn scheduling.
  @end menu
  
  @node Defining Attributes
@@ -4350,7 +4876,7 @@ expression is that of the @var{default} expression.
  @table @code
  @cindex @code{const_int} and attribute tests
  @item (const_int @var{i})
-This test is true if @var{i} is non-zero and false otherwise.
+This test is true if @var{i} is nonzero and false otherwise.
  
  @cindex @code{not} and attributes
  @cindex @code{ior} and attributes
@@ -4365,7 +4891,7 @@ These tests are true if the indicated logical function is true.
  This test is true if operand @var{n} of the insn whose attribute value
  is being determined has mode @var{m} (this part of the test is ignored
  if @var{m} is @code{VOIDmode}) and the function specified by the string
-@var{pred} returns a non-zero value when passed operand @var{n} and mode
+@var{pred} returns a nonzero value when passed operand @var{n} and mode
  @var{m} (this part of the test is ignored if @var{pred} is the null
  string).
  
@@ -4749,9 +5275,10 @@ as follows:
     return (get_attr_length (insn) == 4
             ? "b %l0" : "l r15,=a(%l0); br r15");
  @}
-  [(set (attr "length") (if_then_else (lt (match_dup 0) (const_int 4096))
-                                      (const_int 4)
-                                      (const_int 6)))])
+  [(set (attr "length")
+        (if_then_else (lt (match_dup 0) (const_int 4096))
+                      (const_int 4)
+                      (const_int 6)))])
  @end smallexample
  
  @node Constant Attributes
@@ -4858,14 +5385,101 @@ branch is true, we might represent this as follows:
  @end smallexample
  @c the above is *still* too long.  --mew 4feb93
  
-@node Function Units
-@subsection Specifying Function Units
+@node Processor pipeline description
+@subsection Specifying processor pipeline description
+@cindex processor pipeline description
+@cindex processor functional units
+@cindex instruction latency time
+@cindex interlock delays
+@cindex data dependence delays
+@cindex reservation delays
+@cindex pipeline hazard recognizer
+@cindex automaton based pipeline description
+@cindex regular expressions
+@cindex deterministic finite state automaton
+@cindex automaton based scheduler
+@cindex RISC
+@cindex VLIW
+
+To achieve better performance, most modern processors
+(super-pipelined, superscalar @acronym{RISC}, and @acronym{VLIW}
+processors) have many @dfn{functional units} on which several
+instructions can be executed simultaneously.  An instruction starts
+execution if its issue conditions are satisfied.  If not, the
+instruction is stalled until its conditions are satisfied.  Such
+@dfn{interlock (pipeline) delay} causes interruption of the fetching
+of successor instructions (or demands nop instructions, e.g. for some
+MIPS processors).
+
+There are two major kinds of interlock delays in modern processors.
+The first one is a data dependence delay determining @dfn{instruction
+latency time}.  The instruction execution is not started until all
+source data have been evaluated by prior instructions (there are more
+complex cases when the instruction execution starts even when the data
+are not available but will be ready in given time after the
+instruction execution start).  Taking the data dependence delays into
+account is simple.  The data dependence (true, output, and
+anti-dependence) delay between two instructions is given by a
+constant.  In most cases this approach is adequate.  The second kind
+of interlock delays is a reservation delay.  The reservation delay
+means that two instructions under execution will be in need of shared
+processors resources, i.e. buses, internal registers, and/or
+functional units, which are reserved for some time.  Taking this kind
+of delay into account is complex especially for modern @acronym{RISC}
+processors.
+
+The task of exploiting more processor parallelism is solved by an
+instruction scheduler.  For a better solution to this problem, the
+instruction scheduler has to have an adequate description of the
+processor parallelism (or @dfn{pipeline description}).  Currently GCC 
+provides two alternative ways to describe processor parallelism,
+both described below.  The first method is outlined in the next section;
+it was once the only method provided by GCC, and thus is used in a number
+of exiting ports.  The second, and preferred method, specifies functional
+unit reservations for groups of instructions with the aid of @dfn{regular
+expressions}.  This is called the @dfn{automaton based description}.   
+
+The GCC instruction scheduler uses a @dfn{pipeline hazard recognizer} to
+figure out the possibility of the instruction issue by the processor
+on a given simulated processor cycle.  The pipeline hazard recognizer is
+automatically generated from the processor pipeline description.  The
+pipeline hazard recognizer generated from the automaton based
+description is more sophisticated and based on a deterministic finite
+state automaton (@acronym{DFA}) and therefore faster than one
+generated from the old description.  Furthermore, its speed is not dependent
+on processor complexity.  The instruction issue is possible if there is
+a transition from one automaton state to another one.
+
+You can use any model to describe processor pipeline characteristics
+or even a mix of them.  You could use the old description for some
+processor submodels and the @acronym{DFA}-based one for the rest
+processor submodels.
+
+In general, the usage of the automaton based description is more
+preferable.  Its model is more rich.  It permits to describe more
+accurately pipeline characteristics of processors which results in
+improving code quality (although sometimes only on several percent
+fractions).  It will be also used as an infrastructure to implement
+sophisticated and practical insn scheduling which will try many
+instruction sequences to choose the best one.
+
+
+@menu
+* Old pipeline description:: Specifying information for insn scheduling.
+* Automaton pipeline description:: Describing insn pipeline characteristics.
+* Comparison of the two descriptions:: Drawbacks of the old pipeline description
+@end menu
+
+@node Old pipeline description
+@subsubsection Specifying Function Units
+@cindex old pipeline description
  @cindex function units, for scheduling
  
-On most RISC machines, there are instructions whose results are not
-available for a specific number of cycles.  Common cases are instructions
-that load data from memory.  On many machines, a pipeline stall will result
-if the data is referenced too soon after the load instruction.
+On most @acronym{RISC} machines, there are instructions whose results
+are not available for a specific number of cycles.  Common cases are
+instructions that load data from memory.  On many machines, a pipeline
+stall will result if the data is referenced too soon after the load
+instruction.
  
  In addition, many newer microprocessors have multiple function units, usually
  one for integer and one for floating point, and often will incur pipeline
@@ -4879,13 +5493,14 @@ due to function unit conflicts.
  
  For the purposes of the specifications in this section, a machine is
  divided into @dfn{function units}, each of which execute a specific
-class of instructions in first-in-first-out order.  Function units that
-accept one instruction each cycle and allow a result to be used in the
-succeeding instruction (usually via forwarding) need not be specified.
-Classic RISC microprocessors will normally have a single function unit,
-which we can call @samp{memory}.  The newer ``superscalar'' processors
-will often have function units for floating point operations, usually at
-least a floating point adder and multiplier.
+class of instructions in first-in-first-out order.  Function units
+that accept one instruction each cycle and allow a result to be used
+in the succeeding instruction (usually via forwarding) need not be
+specified.  Classic @acronym{RISC} microprocessors will normally have
+a single function unit, which we can call @samp{memory}.  The newer
+``superscalar'' processors will often have function units for floating
+point operations, usually at least a floating point adder and
+multiplier.
  
  @findex define_function_unit
  Each usage of a function units by a class of insns is specified with a
@@ -4948,10 +5563,10 @@ Typical uses of this vector are where a floating point function unit can
  pipeline either single- or double-precision operations, but not both, or
  where a memory unit can pipeline loads, but not stores, etc.
  
-As an example, consider a classic RISC machine where the result of a
-load instruction is not available for two cycles (a single ``delay''
-instruction is required) and where only one load instruction can be executed
-simultaneously.  This would be specified as:
+As an example, consider a classic @acronym{RISC} machine where the
+result of a load instruction is not available for two cycles (a single
+``delay'' instruction is required) and where only one load instruction
+can be executed simultaneously.  This would be specified as:
  
  @smallexample
  (define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)
@@ -4976,7 +5591,473 @@ units.  These insns will cause a potential conflict for the second unit
  used during their execution and there is no way of representing that
  conflict.  We welcome any examples of how function unit conflicts work
  in such processors and suggestions for their representation.
-@end ifset
+
+@node Automaton pipeline description
+@subsubsection Describing instruction pipeline characteristics
+@cindex automaton based pipeline description
+
+This section describes constructions of the automaton based processor
+pipeline description.  The order of all mentioned below constructions
+in the machine description file is not important.
+
+@findex define_automaton
+@cindex pipeline hazard recognizer
+The following optional construction describes names of automata
+generated and used for the pipeline hazards recognition.  Sometimes
+the generated finite state automaton used by the pipeline hazard
+recognizer is large.  If we use more than one automaton and bind functional
+units to the automata, the summary size of the automata usually is
+less than the size of the single automaton.  If there is no one such
+construction, only one finite state automaton is generated.
+
+@smallexample
+(define_automaton @var{automata-names})
+@end smallexample
+
+@var{automata-names} is a string giving names of the automata.  The
+names are separated by commas.  All the automata should have unique names.
+The automaton name is used in construction @code{define_cpu_unit} and
+@code{define_query_cpu_unit}.
+
+@findex define_cpu_unit
+@cindex processor functional units
+Each processor functional unit used in description of instruction
+reservations should be described by the following construction.
+
+@smallexample
+(define_cpu_unit @var{unit-names} [@var{automaton-name}])
+@end smallexample
+
+@var{unit-names} is a string giving the names of the functional units
+separated by commas.  Don't use name @samp{nothing}, it is reserved
+for other goals.
+
+@var{automaton-name} is a string giving the name of the automaton with
+which the unit is bound.  The automaton should be described in
+construction @code{define_automaton}.  You should give
+@dfn{automaton-name}, if there is a defined automaton.
+
+The assignment of units to automata are constrained by the uses of the
+units in insn reservations.  The most important constraint is: if a
+unit reservation is present on a particular cycle of an alternative
+for an insn reservation, then some unit from the same automaton must
+be present on the same cycle for the other alternatives of the insn
+reservation.  The rest of the constraints are mentioned in the
+description of the subsequent constructions.
+
+@findex define_query_cpu_unit
+@cindex querying function unit reservations
+The following construction describes CPU functional units analogously
+to @code{define_cpu_unit}.  The reservation of such units can be
+queried for an automaton state.  The instruction scheduler never
+queries reservation of functional units for given automaton state.  So
+as a rule, you don't need this construction.  This construction could
+be used for future code generation goals (e.g. to generate
+@acronym{VLIW} insn templates).
+
+@smallexample
+(define_query_cpu_unit @var{unit-names} [@var{automaton-name}])
+@end smallexample
+
+@var{unit-names} is a string giving names of the functional units
+separated by commas.
+
+@var{automaton-name} is a string giving the name of the automaton with
+which the unit is bound.
+
+@findex define_insn_reservation
+@cindex instruction latency time
+@cindex regular expressions
+@cindex data bypass
+The following construction is the major one to describe pipeline
+characteristics of an instruction.
+
+@smallexample
+(define_insn_reservation @var{insn-name} @var{default_latency}
+                         @var{condition} @var{regexp})
+@end smallexample
+
+@var{default_latency} is a number giving latency time of the
+instruction.  There is an important difference between the old
+description and the automaton based pipeline description.  The latency
+time is used for all dependencies when we use the old description.  In
+the automaton based pipeline description, the given latency time is only
+used for true dependencies.  The cost of anti-dependencies is always
+zero and the cost of output dependencies is the difference between
+latency times of the producing and consuming insns (if the difference
+is negative, the cost is considered to be zero).  You can always
+change the default costs for any description by using the target hook
+@code{TARGET_SCHED_ADJUST_COST} (@pxref{Scheduling}).
+
+@var{insn-names} is a string giving the internal name of the insn.  The
+internal names are used in constructions @code{define_bypass} and in
+the automaton description file generated for debugging.  The internal
+name has nothing in common with the names in @code{define_insn}.  It is a
+good practice to use insn classes described in the processor manual.
+
+@var{condition} defines what RTL insns are described by this
+construction.  You should remember that you will be in trouble if
+@var{condition} for two or more different
+@code{define_insn_reservation} constructions is TRUE for an insn.  In
+this case what reservation will be used for the insn is not defined.
+Such cases are not checked during generation of the pipeline hazards
+recognizer because in general recognizing that two conditions may have
+the same value is quite difficult (especially if the conditions
+contain @code{symbol_ref}).  It is also not checked during the
+pipeline hazard recognizer work because it would slow down the
+recognizer considerably.
+
+@var{regexp} is a string describing the reservation of the cpu's functional
+units by the instruction.  The reservations are described by a regular
+expression according to the following syntax:
+
+@smallexample
+       regexp = regexp "," oneof
+              | oneof
+
+       oneof = oneof "|" allof
+             | allof
+
+       allof = allof "+" repeat
+             | repeat
+ 
+       repeat = element "*" number
+              | element
+
+       element = cpu_function_unit_name
+               | reservation_name
+               | result_name
+               | "nothing"
+               | "(" regexp ")"
+@end smallexample
+
+@itemize @bullet
+@item
+@samp{,} is used for describing the start of the next cycle in
+the reservation.
+
+@item
+@samp{|} is used for describing a reservation described by the first
+regular expression @strong{or} a reservation described by the second
+regular expression @strong{or} etc.
+
+@item
+@samp{+} is used for describing a reservation described by the first
+regular expression @strong{and} a reservation described by the
+second regular expression @strong{and} etc.
+
+@item
+@samp{*} is used for convenience and simply means a sequence in which
+the regular expression are repeated @var{number} times with cycle
+advancing (see @samp{,}).
+
+@item
+@samp{cpu_function_unit_name} denotes reservation of the named
+functional unit.
+
+@item
+@samp{reservation_name} --- see description of construction
+@samp{define_reservation}.
+
+@item
+@samp{nothing} denotes no unit reservations.
+@end itemize
+
+@findex define_reservation
+Sometimes unit reservations for different insns contain common parts.
+In such case, you can simplify the pipeline description by describing
+the common part by the following construction
+
+@smallexample
+(define_reservation @var{reservation-name} @var{regexp})
+@end smallexample
+
+@var{reservation-name} is a string giving name of @var{regexp}.
+Functional unit names and reservation names are in the same name
+space.  So the reservation names should be different from the
+functional unit names and can not be reserved name @samp{nothing}.
+
+@findex define_bypass
+@cindex instruction latency time
+@cindex data bypass
+The following construction is used to describe exceptions in the
+latency time for given instruction pair.  This is so called bypasses.
+
+@smallexample
+(define_bypass @var{number} @var{out_insn_names} @var{in_insn_names}
+               [@var{guard}])
+@end smallexample
+
+@var{number} defines when the result generated by the instructions
+given in string @var{out_insn_names} will be ready for the
+instructions given in string @var{in_insn_names}.  The instructions in
+the string are separated by commas.
+
+@var{guard} is an optional string giving the name of a C function which
+defines an additional guard for the bypass.  The function will get the
+two insns as parameters.  If the function returns zero the bypass will
+be ignored for this case.  The additional guard is necessary to
+recognize complicated bypasses, e.g. when the consumer is only an address
+of insn @samp{store} (not a stored value).
+
+@findex exclusion_set
+@findex presence_set
+@findex final_presence_set
+@findex absence_set
+@findex final_absence_set
+@cindex VLIW
+@cindex RISC
+Usually the following three constructions are used to describe
+@acronym{VLIW} processors (more correctly to describe a placement of
+small insns into @acronym{VLIW} insn slots).  Although they can be
+used for @acronym{RISC} processors too.
+
+@smallexample
+(exclusion_set @var{unit-names} @var{unit-names})
+(presence_set @var{unit-names} @var{patterns})
+(final_presence_set @var{unit-names} @var{patterns})
+(absence_set @var{unit-names} @var{patterns})
+(final_absence_set @var{unit-names} @var{patterns})
+@end smallexample
+
+@var{unit-names} is a string giving names of functional units
+separated by commas.
+
+@var{patterns} is a string giving patterns of functional units
+separated by comma.  Currently pattern is is one unit or units
+separated by white-spaces.
+
+The first construction (@samp{exclusion_set}) means that each
+functional unit in the first string can not be reserved simultaneously
+with a unit whose name is in the second string and vice versa.  For
+example, the construction is useful for describing processors
+(e.g. some SPARC processors) with a fully pipelined floating point
+functional unit which can execute simultaneously only single floating
+point insns or only double floating point insns.
+
+The second construction (@samp{presence_set}) means that each
+functional unit in the first string can not be reserved unless at
+least one of pattern of units whose names are in the second string is
+reserved.  This is an asymmetric relation.  For example, it is useful
+for description that @acronym{VLIW} @samp{slot1} is reserved after
+@samp{slot0} reservation.  We could describe it by the following
+construction
+
+@smallexample
+(presence_set "slot1" "slot0")
+@end smallexample
+
+Or @samp{slot1} is reserved only after @samp{slot0} and unit @samp{b0}
+reservation.  In this case we could write
+
+@smallexample
+(presence_set "slot1" "slot0 b0")
+@end smallexample
+
+The third construction (@samp{final_presence_set}) is analogous to
+@samp{presence_set}.  The difference between them is when checking is
+done.  When an instruction is issued in given automaton state
+reflecting all current and planned unit reservations, the automaton
+state is changed.  The first state is a source state, the second one
+is a result state.  Checking for @samp{presence_set} is done on the
+source state reservation, checking for @samp{final_presence_set} is
+done on the result reservation.  This construction is useful to
+describe a reservation which is actually two subsequent reservations.
+For example, if we use
+
+@smallexample
+(presence_set "slot1" "slot0")
+@end smallexample
+
+the following insn will be never issued (because @samp{slot1} requires
+@samp{slot0} which is absent in the source state).
+
+@smallexample
+(define_reservation "insn_and_nop" "slot0 + slot1")
+@end smallexample
+
+but it can be issued if we use analogous @samp{final_presence_set}.
+
+The forth construction (@samp{absence_set}) means that each functional
+unit in the first string can be reserved only if each pattern of units
+whose names are in the second string is not reserved.  This is an
+asymmetric relation (actually @samp{exclusion_set} is analogous to
+this one but it is symmetric).  For example, it is useful for
+description that @acronym{VLIW} @samp{slot0} can not be reserved after
+@samp{slot1} or @samp{slot2} reservation.  We could describe it by the
+following construction
+
+@smallexample
+(absence_set "slot2" "slot0, slot1")
+@end smallexample
+
+Or @samp{slot2} can not be reserved if @samp{slot0} and unit @samp{b0}
+are reserved or @samp{slot1} and unit @samp{b1} are reserved.  In
+this case we could write
+
+@smallexample
+(absence_set "slot2" "slot0 b0, slot1 b1")
+@end smallexample
+
+All functional units mentioned in a set should belong to the same
+automaton.
+
+The last construction (@samp{final_absence_set}) is analogous to
+@samp{absence_set} but checking is done on the result (state)
+reservation.  See comments for @samp{final_presence_set}.
+
+@findex automata_option
+@cindex deterministic finite state automaton
+@cindex nondeterministic finite state automaton
+@cindex finite state automaton minimization
+You can control the generator of the pipeline hazard recognizer with
+the following construction.
+
+@smallexample
+(automata_option @var{options})
+@end smallexample
+
+@var{options} is a string giving options which affect the generated
+code.  Currently there are the following options:
+
+@itemize @bullet
+@item
+@dfn{no-minimization} makes no minimization of the automaton.  This is
+only worth to do when we are debugging the description and need to
+look more accurately at reservations of states.
+
+@item
+@dfn{time} means printing additional time statistics about
+generation of automata.
+
+@item
+@dfn{v} means a generation of the file describing the result automata.
+The file has suffix @samp{.dfa} and can be used for the description
+verification and debugging.
+
+@item
+@dfn{w} means a generation of warning instead of error for
+non-critical errors.
+
+@item
+@dfn{ndfa} makes nondeterministic finite state automata.  This affects
+the treatment of operator @samp{|} in the regular expressions.  The
+usual treatment of the operator is to try the first alternative and,
+if the reservation is not possible, the second alternative.  The
+nondeterministic treatment means trying all alternatives, some of them
+may be rejected by reservations in the subsequent insns.  You can not
+query functional unit reservations in nondeterministic automaton
+states.
+@end itemize
+
+As an example, consider a superscalar @acronym{RISC} machine which can
+issue three insns (two integer insns and one floating point insn) on
+the cycle but can finish only two insns.  To describe this, we define
+the following functional units.
+
+@smallexample
+(define_cpu_unit "i0_pipeline, i1_pipeline, f_pipeline")
+(define_cpu_unit "port0, port1")
+@end smallexample
+
+All simple integer insns can be executed in any integer pipeline and
+their result is ready in two cycles.  The simple integer insns are
+issued into the first pipeline unless it is reserved, otherwise they
+are issued into the second pipeline.  Integer division and
+multiplication insns can be executed only in the second integer
+pipeline and their results are ready correspondingly in 8 and 4
+cycles.  The integer division is not pipelined, i.e. the subsequent
+integer division insn can not be issued until the current division
+insn finished.  Floating point insns are fully pipelined and their
+results are ready in 3 cycles.  Where the result of a floating point
+insn is used by an integer insn, an additional delay of one cycle is
+incurred.  To describe all of this we could specify
+
+@smallexample
+(define_cpu_unit "div")
+
+(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
+                         "(i0_pipeline | i1_pipeline), (port0 | port1)")
+
+(define_insn_reservation "mult" 4 (eq_attr "cpu" "mult")
+                         "i1_pipeline, nothing*2, (port0 | port1)")
+
+(define_insn_reservation "div" 8 (eq_attr "cpu" "div")
+                         "i1_pipeline, div*7, div + (port0 | port1)")
+
+(define_insn_reservation "float" 3 (eq_attr "cpu" "float")
+                         "f_pipeline, nothing, (port0 | port1))
+
+(define_bypass 4 "float" "simple,mult,div")
+@end smallexample
+
+To simplify the description we could describe the following reservation
+
+@smallexample
+(define_reservation "finish" "port0|port1")
+@end smallexample
+
+and use it in all @code{define_insn_reservation} as in the following
+construction
+
+@smallexample
+(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
+                         "(i0_pipeline | i1_pipeline), finish")
+@end smallexample
+
+
+@node Comparison of the two descriptions
+@subsubsection Drawbacks of the old pipeline description
+@cindex old pipeline description
+@cindex automaton based pipeline description
+@cindex processor functional units
+@cindex interlock delays
+@cindex instruction latency time
+@cindex pipeline hazard recognizer
+@cindex data bypass
+
+The old instruction level parallelism description and the pipeline
+hazards recognizer based on it have the following drawbacks in
+comparison with the @acronym{DFA}-based ones:
+  
+@itemize @bullet
+@item
+Each functional unit is believed to be reserved at the instruction
+execution start.  This is a very inaccurate model for modern
+processors.
+
+@item
+An inadequate description of instruction latency times.  The latency
+time is bound with a functional unit reserved by an instruction not
+with the instruction itself.  In other words, the description is
+oriented to describe at most one unit reservation by each instruction.
+It also does not permit to describe special bypasses between
+instruction pairs.
+
+@item
+The implementation of the pipeline hazard recognizer interface has
+constraints on number of functional units.  This is a number of bits
+in integer on the host machine.
+
+@item
+The interface to the pipeline hazard recognizer is more complex than
+one to the automaton based pipeline recognizer.
+
+@item
+An unnatural description when you write a unit and a condition which
+selects instructions using the unit.  Writing all unit reservations
+for an instruction (an instruction class) is more natural.
+
+@item
+The recognition of the interlock delays has a slow implementation.  The GCC
+scheduler supports structures which describe the unit reservations.
+The more functional units a processor has, the slower its pipeline hazard
+recognizer will be.  Such an implementation would become even slower when we
+allowed to
+reserve functional units not only at the instruction execution start.
+In an automaton based pipeline hazard recognizer, speed is not dependent
+on processor complexity.
+@end itemize
  
  @node Conditional Execution
  @section Conditional Execution
@@ -5109,3 +6190,4 @@ You could write:
  
  The constants that are defined with a define_constant are also output
  in the insn-codes.h header file as #defines.
+@end ifset