2003-03-15 Aldy Hernandez <aldyh@redhat.com>

[pf3gnuchains/gcc-fork.git] / gcc / doc / md.texi
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index d8e68fa..da7e38b 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -1,5 +1,5 @@
-@c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2002
-@c Free Software Foundation, Inc.
+@c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 1999, 2000, 2001,
+@c 2002, 2003 Free Software Foundation, Inc.
  @c This is part of the GCC manual.
  @c For copying conditions, see the file gcc.texi.
  
@@ -845,8 +845,8 @@ that of the host machine (on which the compiler is running).
  
  @cindex @samp{F} in constraint
  @item @samp{F}
-An immediate floating operand (expression code @code{const_double}) is
-allowed.
+An immediate floating operand (expression code @code{const_double} or
+@code{const_vector}) is allowed.
  
  @cindex @samp{G} in constraint
  @cindex @samp{H} in constraint
@@ -898,7 +898,7 @@ digit is used together with letters within the same alternative, the
  digit should come last.
  
  This number is allowed to be more than a single digit.  If multiple
-digits are encountered consecutavely, they are interpreted as a single
+digits are encountered consecutively, they are interpreted as a single
  decimal integer.  There is scant chance for ambiguity, since to-date
  it has never been desirable that @samp{10} be interpreted as matching
  either operand 1 @emph{or} operand 0.  Should this be desired, one
@@ -965,7 +965,7 @@ The machine description macro @code{REG_CLASS_FROM_LETTER} has first
  cut at the otherwise unused letters.  If it evaluates to @code{NO_REGS},
  then @code{EXTRA_CONSTRAINT} is evaluated.
  
-A typical use for @code{EXTRA_CONSTRANT} would be to distinguish certain
+A typical use for @code{EXTRA_CONSTRAINT} would be to distinguish certain
  types of memory references that affect other insn operands.
  @end ifset
  @end table
@@ -1256,6 +1256,8 @@ instruction is defined:
    @dots{})
  @end smallexample
  @end ifset
+GCC can only handle one commutative pair in an asm; if you use more, 
+the compiler may fail.
  
  @cindex @samp{#} in constraint
  @item #
@@ -1374,60 +1376,6 @@ An item in the constant pool
  A symbol in the text segment of the current file
  @end table
  
-@item AMD 29000 family---@file{a29k.h}
-@table @code
-@item l
-Local register 0
-
-@item b
-Byte Pointer (@samp{BP}) register
-
-@item q
-@samp{Q} register
-
-@item h
-Special purpose register
-
-@item A
-First accumulator register
-
-@item a
-Other accumulator register
-
-@item f
-Floating point register
-
-@item I
-Constant greater than 0, less than 0x100
-
-@item J
-Constant greater than 0, less than 0x10000
-
-@item K
-Constant whose high 24 bits are on (1)
-
-@item L
-16-bit constant whose high 8 bits are on (1)
-
-@item M
-32-bit constant whose high 16 bits are on (1)
-
-@item N
-32-bit negative constant that fits in 8 bits
-
-@item O
-The constant 0x80000000 or, on the 29050, any 32-bit constant
-whose low 16 bits are 0.
-
-@item P
-16-bit negative constant that fits in 8 bits
-
-@item G
-@itemx H
-A floating point constant (in @code{asm} statements, use the machine
-independent @samp{E} or @samp{F} instead)
-@end table
-
  @item AVR family---@file{avr.h}
  @table @code
  @item l
@@ -1606,6 +1554,10 @@ Second floating point register
  @item c
  @samp{c} register
  
+@item C
+Specifies constant that can be easily constructed in SSE register without
+loading it from memory.
+
  @item d
  @samp{d} register
  
@@ -1744,68 +1696,97 @@ Integer constant in the range 1 to 4 for @code{shladd} instruction
  Memory operand except postincrement and postdecrement
  @end table
  
-@item MIPS---@file{mips.h}
+@item FRV---@file{frv.h}
  @table @code
+@item a
+Register in the class @code{ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item b
+Register in the class @code{EVEN_ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item c
+Register in the class @code{CC_REGS} (@code{fcc0} to @code{fcc3} and
+@code{icc0} to @code{icc3}).
+
  @item d
-General-purpose integer register
+Register in the class @code{GPR_REGS} (@code{gr0} to @code{gr63}).
+
+@item e
+Register in the class @code{EVEN_REGS} (@code{gr0} to @code{gr63}).
+Odd registers are excluded not in the class but through the use of a machine
+mode larger than 4 bytes.
  
  @item f
-Floating-point register (if available)
+Register in the class @code{FPR_REGS} (@code{fr0} to @code{fr63}).
  
  @item h
-@samp{Hi} register
+Register in the class @code{FEVEN_REGS} (@code{fr0} to @code{fr63}).
+Odd registers are excluded not in the class but through the use of a machine
+mode larger than 4 bytes.
  
  @item l
-@samp{Lo} register
+Register in the class @code{LR_REG} (the @code{lr} register).
  
-@item x
-@samp{Hi} or @samp{Lo} register
+@item q
+Register in the class @code{QUAD_REGS} (@code{gr2} to @code{gr63}).
+Register numbers not divisible by 4 are excluded not in the class but through
+the use of a machine mode larger than 8 bytes.
  
-@item y
-General-purpose integer register
+@item t
+Register in the class @code{ICC_REGS} (@code{icc0} to @code{icc3}).
+
+@item u
+Register in the class @code{FCC_REGS} (@code{fcc0} to @code{fcc3}).
+
+@item v
+Register in the class @code{ICR_REGS} (@code{cc4} to @code{cc7}).
+
+@item w
+Register in the class @code{FCR_REGS} (@code{cc0} to @code{cc3}).
+
+@item x
+Register in the class @code{QUAD_FPR_REGS} (@code{fr0} to @code{fr63}).
+Register numbers not divisible by 4 are excluded not in the class but through
+the use of a machine mode larger than 8 bytes.
  
  @item z
-Floating-point status register
+Register in the class @code{SPR_REGS} (@code{lcr} and @code{lr}).
+
+@item A
+Register in the class @code{QUAD_ACC_REGS} (@code{acc0} to @code{acc7}).
+
+@item B
+Register in the class @code{ACCG_REGS} (@code{accg0} to @code{accg7}).
+
+@item C
+Register in the class @code{CR_REGS} (@code{cc0} to @code{cc7}).
+
+@item G
+Floating point constant zero
  
  @item I
-Signed 16-bit constant (for arithmetic instructions)
+6-bit signed integer constant
  
  @item J
-Zero
-
-@item K
-Zero-extended 16-bit constant (for logic instructions)
+10-bit signed integer constant
  
  @item L
-Constant with low 16 bits zero (can be loaded with @code{lui})
+16-bit signed integer constant
  
  @item M
-32-bit constant which requires two instructions to load (a constant
-which is not @samp{I}, @samp{K}, or @samp{L})
+16-bit unsigned integer constant
  
  @item N
-Negative 16-bit constant
+12-bit signed integer constant that is negative---i.e.@: in the
+range of @minus{}2048 to @minus{}1
  
  @item O
-Exact power of two
+Constant zero
  
  @item P
-Positive 16-bit constant
-
-@item G
-Floating point zero
+12-bit signed integer constant that is greater than zero---i.e.@: in the
+range of 1 to 2047.
  
-@item Q
-Memory reference that can be loaded with more than one instruction
-(@samp{m} is preferable for @code{asm} statements)
-
-@item R
-Memory reference that can be loaded with one instruction
-(@samp{m} is preferable for @code{asm} statements)
-
-@item S
-Memory reference in external OSF/rose PIC format
-(@samp{m} is preferable for @code{asm} statements)
  @end table
  
  @item IP2K---@file{ip2k.h}
@@ -1878,6 +1859,70 @@ Zero
  Integers from 0 to 255
  @end table
  
+@item MIPS---@file{mips.h}
+@table @code
+@item d
+General-purpose integer register
+
+@item f
+Floating-point register (if available)
+
+@item h
+@samp{Hi} register
+
+@item l
+@samp{Lo} register
+
+@item x
+@samp{Hi} or @samp{Lo} register
+
+@item y
+General-purpose integer register
+
+@item z
+Floating-point status register
+
+@item I
+Signed 16-bit constant (for arithmetic instructions)
+
+@item J
+Zero
+
+@item K
+Zero-extended 16-bit constant (for logic instructions)
+
+@item L
+Constant with low 16 bits zero (can be loaded with @code{lui})
+
+@item M
+32-bit constant which requires two instructions to load (a constant
+which is not @samp{I}, @samp{K}, or @samp{L})
+
+@item N
+Negative 16-bit constant
+
+@item O
+Exact power of two
+
+@item P
+Positive 16-bit constant
+
+@item G
+Floating point zero
+
+@item Q
+Memory reference that can be loaded with more than one instruction
+(@samp{m} is preferable for @code{asm} statements)
+
+@item R
+Memory reference that can be loaded with one instruction
+(@samp{m} is preferable for @code{asm} statements)
+
+@item S
+Memory reference in external OSF/rose PIC format
+(@samp{m} is preferable for @code{asm} statements)
+@end table
+
  @item Motorola 680x0---@file{m68k.h}
  @table @code
  @item a
@@ -2395,8 +2440,7 @@ Write the generated insn as a @code{parallel} with elements being a
  @code{set} of one register from the appropriate memory location (you may
  also need @code{use} or @code{clobber} elements).  Use a
  @code{match_parallel} (@pxref{RTL Template}) to recognize the insn.  See
-@file{a29k.md} and @file{rs6000.md} for examples of the use of this insn
-pattern.
+@file{rs6000.md} for examples of the use of this insn pattern.
  
  @cindex @samp{store_multiple} instruction pattern
  @item @samp{store_multiple}
@@ -2407,7 +2451,7 @@ operand 2 is a constant: the number of consecutive registers.
  
  @cindex @code{push@var{m}} instruction pattern
  @item @samp{push@var{m}}
-Output an push instruction.  Operand 0 is value to push.  Used only when
+Output a push instruction.  Operand 0 is value to push.  Used only when
  @code{PUSH_ROUNDING} is defined.  For historical reason, this pattern may be
  missing and in such case an @code{mov} expander is used instead, with a
  @code{MEM} expression forming the push operation.  The @code{mov} expander
@@ -2523,7 +2567,111 @@ Store the absolute value of operand 1 into operand 0.
  Store the square root of operand 1 into operand 0.
  
  The @code{sqrt} built-in function of C always uses the mode which
-corresponds to the C data type @code{double}.
+corresponds to the C data type @code{double} and the @code{sqrtf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{cos@var{m}2} instruction pattern
+@item @samp{cos@var{m}2}
+Store the cosine of operand 1 into operand 0.
+
+The @code{cos} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{cosf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{sin@var{m}2} instruction pattern
+@item @samp{sin@var{m}2}
+Store the sine of operand 1 into operand 0.
+
+The @code{sin} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{sinf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{exp@var{m}2} instruction pattern
+@item @samp{exp@var{m}2}
+Store the exponential of operand 1 into operand 0.
+
+The @code{exp} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{expf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{log@var{m}2} instruction pattern
+@item @samp{log@var{m}2}
+Store the natural logarithm of operand 1 into operand 0.
+
+The @code{log} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{logf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{pow@var{m}3} instruction pattern
+@item @samp{pow@var{m}3}
+Store the value of operand 1 raised to the exponent operand 2
+into operand 0.
+
+The @code{pow} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{powf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{atan2@var{m}3} instruction pattern
+@item @samp{atan2@var{m}3}
+Store the arc tangent (inverse tangent) of operand 1 divided by
+operand 2 into operand 0, using the signs of both arguments to
+determine the quadrant of the result.
+
+The @code{atan2} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{atan2f}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{floor@var{m}2} instruction pattern
+@item @samp{floor@var{m}2}
+Store the largest integral value not greater than argument.
+
+The @code{floor} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{floorf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{trunc@var{m}2} instruction pattern
+@item @samp{trunc@var{m}2}
+Store the argument rounded to integer towards zero.
+
+The @code{trunc} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{truncf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{round@var{m}2} instruction pattern
+@item @samp{round@var{m}2}
+Store the argument rounded to integer away from zero.
+
+The @code{round} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{roundf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{ceil@var{m}2} instruction pattern
+@item @samp{ceil@var{m}2}
+Store the argument rounded to integer away from zero.
+
+The @code{ceil} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{ceilf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
+
+@cindex @code{nearbyint@var{m}2} instruction pattern
+@item @samp{nearbyint@var{m}2}
+Store the argument rounded according to the default rounding mode
+
+The @code{nearbyint} built-in function of C always uses the mode which
+corresponds to the C data type @code{double} and the @code{nearbyintf}
+built-in function uses the mode which corresponds to the C data
+type @code{float}.
  
  @cindex @code{ffs@var{m}2} instruction pattern
  @item @samp{ffs@var{m}2}
@@ -2536,6 +2684,36 @@ generating the instruction.
  The @code{ffs} built-in function of C always uses the mode which
  corresponds to the C data type @code{int}.
  
+@cindex @code{clz@var{m}2} instruction pattern
+@item @samp{clz@var{m}2}
+Store into operand 0 the number of leading 0-bits in @var{x}, starting
+at the most significant bit position.  If @var{x} is 0, the result is
+undefined.  @var{m} is the mode of operand 0; operand 1's mode is
+specified by the instruction pattern, and the compiler will convert the
+operand to that mode before generating the instruction.
+
+@cindex @code{ctz@var{m}2} instruction pattern
+@item @samp{ctz@var{m}2}
+Store into operand 0 the number of trailing 0-bits in @var{x}, starting
+at the least significant bit position.  If @var{x} is 0, the result is
+undefined.  @var{m} is the mode of operand 0; operand 1's mode is
+specified by the instruction pattern, and the compiler will convert the
+operand to that mode before generating the instruction.
+
+@cindex @code{popcount@var{m}2} instruction pattern
+@item @samp{popcount@var{m}2}
+Store into operand 0 the number of 1-bits in @var{x}.  @var{m} is the
+mode of operand 0; operand 1's mode is specified by the instruction
+pattern, and the compiler will convert the operand to that mode before
+generating the instruction.
+
+@cindex @code{parity@var{m}2} instruction pattern
+@item @samp{parity@var{m}2}
+Store into operand 0 the parity of @var{x}, i.@:e. the number of 1-bits
+in @var{x} modulo 2.  @var{m} is the mode of operand 0; operand 1's mode
+is specified by the instruction pattern, and the compiler will convert
+the operand to that mode before generating the instruction.
+
  @cindex @code{one_cmpl@var{m}2} instruction pattern
  @item @samp{one_cmpl@var{m}2}
  Store the bitwise-complement of operand 1 into operand 0.
@@ -2727,6 +2905,13 @@ codes and vice versa.
  If the machine does not have conditional move instructions, do not
  define these patterns.
  
+@cindex @code{add@var{mode}cc} instruction pattern
+@item @samp{mov@var{mode}cc}
+Similar to @samp{mov@var{mode}cc} but for conditional addition.  Conditionally
+move operand 2 or (operands 2 + operand 3) into operand 0 according to the
+comparison in operand 1.  If the comparison is true, operand 2 is moved into
+operand 0, otherwise operand 3 is moved.
+
  @cindex @code{s@var{cond}} instruction pattern
  @item @samp{s@var{cond}}
  Store zero or nonzero in the operand according to the condition codes.
@@ -3442,7 +3627,7 @@ multiple condition registers, use a pseudo register.
  @findex next_cc0_user
  On some machines, the type of branch instruction generated may depend on
  the way the condition code was produced; for example, on the 68k and
-Sparc, setting the condition code directly from an add or subtract
+SPARC, setting the condition code directly from an add or subtract
  instruction does not clear the overflow bit the way that a test
  instruction does, so a different branch instruction must be used for
  some conditional branches.  For machines that use @code{(cc0)}, the set
@@ -3461,7 +3646,7 @@ different formats of the condition code register.
  Registers used to store the condition code value should have a mode that
  is in class @code{MODE_CC}.  Normally, it will be @code{CCmode}.  If
  additional modes are required (as for the add example mentioned above in
-the Sparc), define the macro @code{EXTRA_CC_MODES} to list the
+the SPARC), define the macro @code{EXTRA_CC_MODES} to list the
  additional modes required (@pxref{Condition Code}).  Also define
  @code{SELECT_CC_MODE} to choose a mode given an operand of a compare.
  
@@ -3473,7 +3658,7 @@ be specified at that time.
  If the cases that require different modes would be made by instruction
  combination, the macro @code{SELECT_CC_MODE} determines which machine
  mode should be used for the comparison result.  The patterns should be
-written using that mode.  To support the case of the add on the Sparc
+written using that mode.  To support the case of the add on the SPARC
  discussed above, we have the pattern
  
  @smallexample
@@ -3487,7 +3672,7 @@ discussed above, we have the pattern
    "@dots{}")
  @end smallexample
  
-The @code{SELECT_CC_MODE} macro on the Sparc returns @code{CC_NOOVmode}
+The @code{SELECT_CC_MODE} macro on the SPARC returns @code{CC_NOOVmode}
  for comparisons whose argument is a @code{plus}.
  
  @node Looping Patterns
@@ -3495,7 +3680,7 @@ for comparisons whose argument is a @code{plus}.
  @cindex looping instruction patterns
  @cindex defining looping instruction patterns
  
-Some machines have special jump instructions that can be utilised to
+Some machines have special jump instructions that can be utilized to
  make loops more efficient.  A common example is the 68000 @samp{dbra}
  instruction which performs a decrement of a register and a branch if the
  result was greater than zero.  Other machines, in particular digital
@@ -3626,6 +3811,14 @@ For these operators, if only one operand is a @code{neg}, @code{not},
  @code{mult}, @code{plus}, or @code{minus} expression, it will be the
  first operand.
  
+@item
+In combinations of @code{neg}, @code{mult}, @code{plus}, and
+@code{minus}, the @code{neg} operations (if any) will be moved inside
+the operations as far as possible.  For instance, 
+@code{(neg (mult A B))} is canonicalized as @code{(mult (neg A) B)}, but
+@code{(plus (mult (neg A) B) C)} is canonicalized as
+@code{(minus A (mult B C))}.
+
  @cindex @code{compare}, canonicalization of
  @item
  For the @code{compare} operator, a constant is always the second operand
@@ -4187,7 +4380,7 @@ For example:
  
  @end smallexample
  
-Where @var{pathname} is a string that specifies the the location of the file,
+Where @var{pathname} is a string that specifies the location of the file,
  specifies the include file to be in @file{gcc/config/target/filestuff}. The
  directory @file{gcc/config/target} is regarded as the default directory.
  
@@ -5208,12 +5401,12 @@ branch is true, we might represent this as follows:
  @cindex RISC
  @cindex VLIW
  
-To achieve better productivity most modern processors
+To achieve better performance, most modern processors
  (super-pipelined, superscalar @acronym{RISC}, and @acronym{VLIW}
  processors) have many @dfn{functional units} on which several
  instructions can be executed simultaneously.  An instruction starts
  execution if its issue conditions are satisfied.  If not, the
-instruction is interlocked until its conditions are satisfied.  Such
+instruction is stalled until its conditions are satisfied.  Such
  @dfn{interlock (pipeline) delay} causes interruption of the fetching
  of successor instructions (or demands nop instructions, e.g. for some
  MIPS processors).
@@ -5223,7 +5416,7 @@ The first one is a data dependence delay determining @dfn{instruction
  latency time}.  The instruction execution is not started until all
  source data have been evaluated by prior instructions (there are more
  complex cases when the instruction execution starts even when the data
-are not availaible but will be ready in given time after the
+are not available but will be ready in given time after the
  instruction execution start).  Taking the data dependence delays into
  account is simple.  The data dependence (true, output, and
  anti-dependence) delay between two instructions is given by a
@@ -5236,25 +5429,25 @@ of delay into account is complex especially for modern @acronym{RISC}
  processors.
  
  The task of exploiting more processor parallelism is solved by an
-instruction scheduler.  For better solution of this problem, the
+instruction scheduler.  For a better solution to this problem, the
  instruction scheduler has to have an adequate description of the
-processor parallelism (or @dfn{pipeline description}).  Currently GCC
-has two ways to describe processor parallelism.  The first one is old
-and originated from instruction scheduler written by Michael Tiemann
-and described in the first subsequent section.  The second one was
-created later.  It is based on description of functional unit
-reservations by processor instructions with the aid of @dfn{regular
-expressions}.  This is so called @dfn{automaton based description}.
-
-Gcc instruction scheduler uses a @dfn{pipeline hazard recognizer} to
+processor parallelism (or @dfn{pipeline description}).  Currently GCC 
+provides two alternative ways to describe processor parallelism,
+both described below.  The first method is outlined in the next section;
+it was once the only method provided by GCC, and thus is used in a number
+of exiting ports.  The second, and preferred method, specifies functional
+unit reservations for groups of instructions with the aid of @dfn{regular
+expressions}.  This is called the @dfn{automaton based description}.   
+
+The GCC instruction scheduler uses a @dfn{pipeline hazard recognizer} to
  figure out the possibility of the instruction issue by the processor
-on given simulated processor cycle.  The pipeline hazard recognizer is
-a code generated from the processor pipeline description.  The
+on a given simulated processor cycle.  The pipeline hazard recognizer is
+automatically generated from the processor pipeline description.  The
  pipeline hazard recognizer generated from the automaton based
-description is more sophisticated and based on deterministic finite
+description is more sophisticated and based on a deterministic finite
  state automaton (@acronym{DFA}) and therefore faster than one
-generated from the old description.  Also its speed is not depended on
-processor complexity.  The instruction issue is possible if there is
+generated from the old description.  Furthermore, its speed is not dependent
+on processor complexity.  The instruction issue is possible if there is
  a transition from one automaton state to another one.
  
  You can use any model to describe processor pipeline characteristics
@@ -5412,7 +5605,7 @@ in the machine description file is not important.
  The following optional construction describes names of automata
  generated and used for the pipeline hazards recognition.  Sometimes
  the generated finite state automaton used by the pipeline hazard
-recognizer is large.  If we use more one automaton and bind functional
+recognizer is large.  If we use more than one automaton and bind functional
  units to the automata, the summary size of the automata usually is
  less than the size of the single automaton.  If there is no one such
  construction, only one finite state automaton is generated.
@@ -5439,21 +5632,28 @@ reservations should be described by the following construction.
  separated by commas.  Don't use name @samp{nothing}, it is reserved
  for other goals.
  
-@var{automaton-name} is a string giving the name of automaton with
+@var{automaton-name} is a string giving the name of the automaton with
  which the unit is bound.  The automaton should be described in
  construction @code{define_automaton}.  You should give
  @dfn{automaton-name}, if there is a defined automaton.
  
+The assignment of units to automata are constrained by the uses of the
+units in insn reservations.  The most important constraint is: if a
+unit reservation is present on a particular cycle of an alternative
+for an insn reservation, then some unit from the same automaton must
+be present on the same cycle for the other alternatives of the insn
+reservation.  The rest of the constraints are mentioned in the
+description of the subsequent constructions.
+
  @findex define_query_cpu_unit
  @cindex querying function unit reservations
  The following construction describes CPU functional units analogously
-to @code{define_cpu_unit}.  If we use automata without their
-minimization, the reservation of such units can be queried for an
-automaton state.  The instruction scheduler never queries reservation
-of functional units for given automaton state.  So as a rule, you
-don't need this construction.  This construction could be used for
-future code generation goals (e.g. to generate @acronym{VLIW} insn
-templates).
+to @code{define_cpu_unit}.  The reservation of such units can be
+queried for an automaton state.  The instruction scheduler never
+queries reservation of functional units for given automaton state.  So
+as a rule, you don't need this construction.  This construction could
+be used for future code generation goals (e.g. to generate
+@acronym{VLIW} insn templates).
  
  @smallexample
  (define_query_cpu_unit @var{unit-names} [@var{automaton-name}])
@@ -5462,14 +5662,14 @@ templates).
  @var{unit-names} is a string giving names of the functional units
  separated by commas.
  
-@var{automaton-name} is a string giving name of the automaton with
+@var{automaton-name} is a string giving the name of the automaton with
  which the unit is bound.
  
  @findex define_insn_reservation
  @cindex instruction latency time
  @cindex regular expressions
  @cindex data bypass
-The following construction is major one to describe pipeline
+The following construction is the major one to describe pipeline
  characteristics of an instruction.
  
  @smallexample
@@ -5481,18 +5681,18 @@ characteristics of an instruction.
  instruction.  There is an important difference between the old
  description and the automaton based pipeline description.  The latency
  time is used for all dependencies when we use the old description.  In
-the automaton based pipeline description, given latency time is used
-only for true dependencies.  The cost of anti-dependencies is always
+the automaton based pipeline description, the given latency time is only
+used for true dependencies.  The cost of anti-dependencies is always
  zero and the cost of output dependencies is the difference between
  latency times of the producing and consuming insns (if the difference
-is negative, the cost is considered to be zero).  You always can
-change the default costs for any description by using target hook
+is negative, the cost is considered to be zero).  You can always
+change the default costs for any description by using the target hook
  @code{TARGET_SCHED_ADJUST_COST} (@pxref{Scheduling}).
  
-@var{insn-names} is a string giving internal name of the insn.  The
+@var{insn-names} is a string giving the internal name of the insn.  The
  internal names are used in constructions @code{define_bypass} and in
  the automaton description file generated for debugging.  The internal
-name has nothing common with the names in @code{define_insn}.  It is a
+name has nothing in common with the names in @code{define_insn}.  It is a
  good practice to use insn classes described in the processor manual.
  
  @var{condition} defines what RTL insns are described by this
@@ -5507,7 +5707,7 @@ contain @code{symbol_ref}).  It is also not checked during the
  pipeline hazard recognizer work because it would slow down the
  recognizer considerably.
  
-@var{regexp} is a string describing reservation of the cpu functional
+@var{regexp} is a string describing the reservation of the cpu's functional
  units by the instruction.  The reservations are described by a regular
  expression according to the following syntax:
  
@@ -5593,16 +5793,18 @@ given in string @var{out_insn_names} will be ready for the
  instructions given in string @var{in_insn_names}.  The instructions in
  the string are separated by commas.
  
-@var{guard} is an optional string giving name of a C function which
+@var{guard} is an optional string giving the name of a C function which
  defines an additional guard for the bypass.  The function will get the
  two insns as parameters.  If the function returns zero the bypass will
  be ignored for this case.  The additional guard is necessary to
-recognize complicated bypasses, e.g. when consumer is only an address
+recognize complicated bypasses, e.g. when the consumer is only an address
  of insn @samp{store} (not a stored value).
  
  @findex exclusion_set
  @findex presence_set
+@findex final_presence_set
  @findex absence_set
+@findex final_absence_set
  @cindex VLIW
  @cindex RISC
  Usually the following three constructions are used to describe
@@ -5612,13 +5814,19 @@ used for @acronym{RISC} processors too.
  
  @smallexample
  (exclusion_set @var{unit-names} @var{unit-names})
-(presence_set @var{unit-names} @var{unit-names})
-(absence_set @var{unit-names} @var{unit-names})
+(presence_set @var{unit-names} @var{patterns})
+(final_presence_set @var{unit-names} @var{patterns})
+(absence_set @var{unit-names} @var{patterns})
+(final_absence_set @var{unit-names} @var{patterns})
  @end smallexample
  
  @var{unit-names} is a string giving names of functional units
  separated by commas.
  
+@var{patterns} is a string giving patterns of functional units
+separated by comma.  Currently pattern is is one unit or units
+separated by white-spaces.
+
  The first construction (@samp{exclusion_set}) means that each
  functional unit in the first string can not be reserved simultaneously
  with a unit whose name is in the second string and vice versa.  For
@@ -5629,22 +5837,75 @@ point insns or only double floating point insns.
  
  The second construction (@samp{presence_set}) means that each
  functional unit in the first string can not be reserved unless at
-least one of units whose names are in the second string is reserved.
-This is an asymmetric relation.  For example, it is useful for
-description that @acronym{VLIW} @samp{slot1} is reserved after
-@samp{slot0} reservation.
-
-The third construction (@samp{absence_set}) means that each functional
-unit in the first string can be reserved only if each unit whose name
-is in the second string is not reserved.  This is an asymmetric
-relation (actually @samp{exclusion_set} is analogous to this one but
-it is symmetric).  For example, it is useful for description that
-@acronym{VLIW} @samp{slot0} can not be reserved after @samp{slot1} or
-@samp{slot2} reservation.
-
-All functional units mentioned in a set should belong the same
+least one of pattern of units whose names are in the second string is
+reserved.  This is an asymmetric relation.  For example, it is useful
+for description that @acronym{VLIW} @samp{slot1} is reserved after
+@samp{slot0} reservation.  We could describe it by the following
+construction
+
+@smallexample
+(presence_set "slot1" "slot0")
+@end smallexample
+
+Or @samp{slot1} is reserved only after @samp{slot0} and unit @samp{b0}
+reservation.  In this case we could write
+
+@smallexample
+(presence_set "slot1" "slot0 b0")
+@end smallexample
+
+The third construction (@samp{final_presence_set}) is analogous to
+@samp{presence_set}.  The difference between them is when checking is
+done.  When an instruction is issued in given automaton state
+reflecting all current and planned unit reservations, the automaton
+state is changed.  The first state is a source state, the second one
+is a result state.  Checking for @samp{presence_set} is done on the
+source state reservation, checking for @samp{final_presence_set} is
+done on the result reservation.  This construction is useful to
+describe a reservation which is actually two subsequent reservations.
+For example, if we use
+
+@smallexample
+(presence_set "slot1" "slot0")
+@end smallexample
+
+the following insn will be never issued (because @samp{slot1} requires
+@samp{slot0} which is absent in the source state).
+
+@smallexample
+(define_reservation "insn_and_nop" "slot0 + slot1")
+@end smallexample
+
+but it can be issued if we use analogous @samp{final_presence_set}.
+
+The forth construction (@samp{absence_set}) means that each functional
+unit in the first string can be reserved only if each pattern of units
+whose names are in the second string is not reserved.  This is an
+asymmetric relation (actually @samp{exclusion_set} is analogous to
+this one but it is symmetric).  For example, it is useful for
+description that @acronym{VLIW} @samp{slot0} can not be reserved after
+@samp{slot1} or @samp{slot2} reservation.  We could describe it by the
+following construction
+
+@smallexample
+(absence_set "slot2" "slot0, slot1")
+@end smallexample
+
+Or @samp{slot2} can not be reserved if @samp{slot0} and unit @samp{b0}
+are reserved or @samp{slot1} and unit @samp{b1} are reserved.  In
+this case we could write
+
+@smallexample
+(absence_set "slot2" "slot0 b0, slot1 b1")
+@end smallexample
+
+All functional units mentioned in a set should belong to the same
  automaton.
  
+The last construction (@samp{final_absence_set}) is analogous to
+@samp{absence_set} but checking is done on the result (state)
+reservation.  See comments for @samp{final_presence_set}.
+
  @findex automata_option
  @cindex deterministic finite state automaton
  @cindex nondeterministic finite state automaton
@@ -5662,8 +5923,8 @@ code.  Currently there are the following options:
  @itemize @bullet
  @item
  @dfn{no-minimization} makes no minimization of the automaton.  This is
-only worth to do when we are going to query CPU functional unit
-reservations in an automaton state.
+only worth to do when we are debugging the description and need to
+look more accurately at reservations of states.
  
  @item
  @dfn{time} means printing additional time statistics about
@@ -5696,7 +5957,7 @@ the following functional units.
  
  @smallexample
  (define_cpu_unit "i0_pipeline, i1_pipeline, f_pipeline")
-(define_cpu_unit "port_0, port1")
+(define_cpu_unit "port0, port1")
  @end smallexample
  
  All simple integer insns can be executed in any integer pipeline and
@@ -5708,26 +5969,26 @@ pipeline and their results are ready correspondingly in 8 and 4
  cycles.  The integer division is not pipelined, i.e. the subsequent
  integer division insn can not be issued until the current division
  insn finished.  Floating point insns are fully pipelined and their
-results are ready in 3 cycles.  There is also additional one cycle
-delay in the usage by integer insns of result produced by floating
-point insns.  To describe all of this we could specify
+results are ready in 3 cycles.  Where the result of a floating point
+insn is used by an integer insn, an additional delay of one cycle is
+incurred.  To describe all of this we could specify
  
  @smallexample
  (define_cpu_unit "div")
  
  (define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
-                         "(i0_pipeline | i1_pipeline), (port_0 | port1)")
+                         "(i0_pipeline | i1_pipeline), (port0 | port1)")
  
  (define_insn_reservation "mult" 4 (eq_attr "cpu" "mult")
-                         "i1_pipeline, nothing*2, (port_0 | port1)")
+                         "i1_pipeline, nothing*2, (port0 | port1)")
  
  (define_insn_reservation "div" 8 (eq_attr "cpu" "div")
-                         "i1_pipeline, div*7, div + (port_0 | port1)")
+                         "i1_pipeline, div*7, div + (port0 | port1)")
  
  (define_insn_reservation "float" 3 (eq_attr "cpu" "float")
-                         "f_pipeline, nothing, (port_0 | port1))
+                         "f_pipeline, nothing, (port0 | port1))
  
-(define_bypass 4 "float" "simple,mut,div")
+(define_bypass 4 "float" "simple,mult,div")
  @end smallexample
  
  To simplify the description we could describe the following reservation
@@ -5783,17 +6044,18 @@ The interface to the pipeline hazard recognizer is more complex than
  one to the automaton based pipeline recognizer.
  
  @item
-An unnatural description when you write an unit and a condition which
+An unnatural description when you write a unit and a condition which
  selects instructions using the unit.  Writing all unit reservations
  for an instruction (an instruction class) is more natural.
  
  @item
-The recognition of the interlock delays has slow implementation.  GCC
+The recognition of the interlock delays has a slow implementation.  The GCC
  scheduler supports structures which describe the unit reservations.
-The more processor has functional units, the slower pipeline hazard
-recognizer.  Such implementation would become slower when we enable to
+The more functional units a processor has, the slower its pipeline hazard
+recognizer will be.  Such an implementation would become even slower when we
+allowed to
  reserve functional units not only at the instruction execution start.
-The automaton based pipeline hazard recognizer speed is not depended
+In an automaton based pipeline hazard recognizer, speed is not dependent
  on processor complexity.
  @end itemize