libquadmath/printf/mul_n.c

   1 /* mpn_mul_n -- Multiply two natural numbers of length n.
   2
   3 Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU Lesser General Public License as published by
   9 the Free Software Foundation; either version 2.1 of the License, or (at your
  10 option) any later version.
  11
  12 The GNU MP Library is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 License for more details.
  16
  17 You should have received a copy of the GNU Lesser General Public License
  18 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  20 MA 02111-1307, USA. */
  21
  22 #include "gmp-impl.h"
  23
  24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  25    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  26    always stored.  Return the most significant limb.
  27
  28    Argument constraints:
  29    1. PRODP != UP and PRODP != VP, i.e. the destination
  30       must be distinct from the multiplier and the multiplicand.  */
  31
  32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  33    value which is good on most machines.  */
  34 #ifndef KARATSUBA_THRESHOLD
  35 #define KARATSUBA_THRESHOLD 32
  36 #endif
  37
  38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
  39 #if KARATSUBA_THRESHOLD < 2
  40 #undef KARATSUBA_THRESHOLD
  41 #define KARATSUBA_THRESHOLD 2
  42 #endif
  43
  44 /* Handle simple cases with traditional multiplication.
  45
  46    This is the most critical code of multiplication.  All multiplies rely
  47    on this, both small and huge.  Small ones arrive here immediately.  Huge
  48    ones arrive here as this is the base case for Karatsuba's recursive
  49    algorithm below.  */
  50
  51 void
  52 #if __STDC__
  53 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  54 #else
  55 impn_mul_n_basecase (prodp, up, vp, size)
  56      mp_ptr prodp;
  57      mp_srcptr up;
  58      mp_srcptr vp;
  59      mp_size_t size;
  60 #endif
  61 {
  62   mp_size_t i;
  63   mp_limb_t cy_limb;
  64   mp_limb_t v_limb;
  65
  66   /* Multiply by the first limb in V separately, as the result can be
  67      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  68   v_limb = vp[0];
  69   if (v_limb <= 1)
  70     {
  71       if (v_limb == 1)
  72         MPN_COPY (prodp, up, size);
  73       else
  74         MPN_ZERO (prodp, size);
  75       cy_limb = 0;
  76     }
  77   else
  78     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  79
  80   prodp[size] = cy_limb;
  81   prodp++;
  82
  83   /* For each iteration in the outer loop, multiply one limb from
  84      U with one limb from V, and add it to PROD.  */
  85   for (i = 1; i < size; i++)
  86     {
  87       v_limb = vp[i];
  88       if (v_limb <= 1)
  89         {
  90           cy_limb = 0;
  91           if (v_limb == 1)
  92             cy_limb = mpn_add_n (prodp, prodp, up, size);
  93         }
  94       else
  95         cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  96
  97       prodp[size] = cy_limb;
  98       prodp++;
  99     }
 100 }
 101
 102 void
 103 #if __STDC__
 104 impn_mul_n (mp_ptr prodp,
 105              mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
 106 #else
 107 impn_mul_n (prodp, up, vp, size, tspace)
 108      mp_ptr prodp;
 109      mp_srcptr up;
 110      mp_srcptr vp;
 111      mp_size_t size;
 112      mp_ptr tspace;
 113 #endif
 114 {
 115   if ((size & 1) != 0)
 116     {
 117       /* The size is odd, the code code below doesn't handle that.
 118          Multiply the least significant (size - 1) limbs with a recursive
 119          call, and handle the most significant limb of S1 and S2
 120          separately.  */
 121       /* A slightly faster way to do this would be to make the Karatsuba
 122          code below behave as if the size were even, and let it check for
 123          odd size in the end.  I.e., in essence move this code to the end.
 124          Doing so would save us a recursive call, and potentially make the
 125          stack grow a lot less.  */
 126
 127       mp_size_t esize = size - 1;       /* even size */
 128       mp_limb_t cy_limb;
 129
 130       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
 131       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
 132       prodp[esize + esize] = cy_limb;
 133       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
 134
 135       prodp[esize + size] = cy_limb;
 136     }
 137   else
 138     {
 139       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 140
 141          Split U in two pieces, U1 and U0, such that
 142          U = U0 + U1*(B**n),
 143          and V in V1 and V0, such that
 144          V = V0 + V1*(B**n).
 145
 146          UV is then computed recursively using the identity
 147
 148                 2n   n          n                     n
 149          UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 150                         1 1        1  0   0  1              0 0
 151
 152          Where B = 2**BITS_PER_MP_LIMB.  */
 153
 154       mp_size_t hsize = size >> 1;
 155       mp_limb_t cy;
 156       int negflg;
 157
 158       /*** Product H.    ________________  ________________
 159                         |_____U1 x V1____||____U0 x V0_____|  */
 160       /* Put result in upper part of PROD and pass low part of TSPACE
 161          as new TSPACE.  */
 162       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
 163
 164       /*** Product M.    ________________
 165                         |_(U1-U0)(V0-V1)_|  */
 166       if (mpn_cmp (up + hsize, up, hsize) >= 0)
 167         {
 168           mpn_sub_n (prodp, up + hsize, up, hsize);
 169           negflg = 0;
 170         }
 171       else
 172         {
 173           mpn_sub_n (prodp, up, up + hsize, hsize);
 174           negflg = 1;
 175         }
 176       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
 177         {
 178           mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
 179           negflg ^= 1;
 180         }
 181       else
 182         {
 183           mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
 184           /* No change of NEGFLG.  */
 185         }
 186       /* Read temporary operands from low part of PROD.
 187          Put result in low part of TSPACE using upper part of TSPACE
 188          as new TSPACE.  */
 189       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
 190
 191       /*** Add/copy product H.  */
 192       MPN_COPY (prodp + hsize, prodp + size, hsize);
 193       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
 194
 195       /*** Add product M (if NEGFLG M is a negative number).  */
 196       if (negflg)
 197         cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 198       else
 199         cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 200
 201       /*** Product L.    ________________  ________________
 202                         |________________||____U0 x V0_____|  */
 203       /* Read temporary operands from low part of PROD.
 204          Put result in low part of TSPACE using upper part of TSPACE
 205          as new TSPACE.  */
 206       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
 207
 208       /*** Add/copy Product L (twice).  */
 209
 210       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 211       if (cy)
 212         mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
 213
 214       MPN_COPY (prodp, tspace, hsize);
 215       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 216       if (cy)
 217         mpn_add_1 (prodp + size, prodp + size, size, 1);
 218     }
 219 }