libgo/go/big/nat.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package big implements multi-precision arithmetic (big numbers).
   6 // The following numeric types are supported:
   7 //
   8 //      - Int   signed integers
   9 //      - Rat   rational numbers
  10 //
  11 // All methods on Int take the result as the receiver; if it is one
  12 // of the operands it may be overwritten (and its memory reused).
  13 // To enable chaining of operations, the result is also returned.
  14 //
  15 package big
  16
  17 // This file contains operations on unsigned multi-precision integers.
  18 // These are the building blocks for the operations on signed integers
  19 // and rationals.
  20
  21 import (
  22         "errors"
  23         "io"
  24         "rand"
  25 )
  26
  27 // An unsigned integer x of the form
  28 //
  29 //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
  30 //
  31 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
  32 // with the digits x[i] as the slice elements.
  33 //
  34 // A number is normalized if the slice contains no leading 0 digits.
  35 // During arithmetic operations, denormalized values may occur but are
  36 // always normalized before returning the final result. The normalized
  37 // representation of 0 is the empty or nil slice (length = 0).
  38 //
  39 type nat []Word
  40
  41 var (
  42         natOne = nat{1}
  43         natTwo = nat{2}
  44         natTen = nat{10}
  45 )
  46
  47 func (z nat) clear() {
  48         for i := range z {
  49                 z[i] = 0
  50         }
  51 }
  52
  53 func (z nat) norm() nat {
  54         i := len(z)
  55         for i > 0 && z[i-1] == 0 {
  56                 i--
  57         }
  58         return z[0:i]
  59 }
  60
  61 func (z nat) make(n int) nat {
  62         if n <= cap(z) {
  63                 return z[0:n] // reuse z
  64         }
  65         // Choosing a good value for e has significant performance impact
  66         // because it increases the chance that a value can be reused.
  67         const e = 4 // extra capacity
  68         return make(nat, n, n+e)
  69 }
  70
  71 func (z nat) setWord(x Word) nat {
  72         if x == 0 {
  73                 return z.make(0)
  74         }
  75         z = z.make(1)
  76         z[0] = x
  77         return z
  78 }
  79
  80 func (z nat) setUint64(x uint64) nat {
  81         // single-digit values
  82         if w := Word(x); uint64(w) == x {
  83                 return z.setWord(w)
  84         }
  85
  86         // compute number of words n required to represent x
  87         n := 0
  88         for t := x; t > 0; t >>= _W {
  89                 n++
  90         }
  91
  92         // split x into n words
  93         z = z.make(n)
  94         for i := range z {
  95                 z[i] = Word(x & _M)
  96                 x >>= _W
  97         }
  98
  99         return z
 100 }
 101
 102 func (z nat) set(x nat) nat {
 103         z = z.make(len(x))
 104         copy(z, x)
 105         return z
 106 }
 107
 108 func (z nat) add(x, y nat) nat {
 109         m := len(x)
 110         n := len(y)
 111
 112         switch {
 113         case m < n:
 114                 return z.add(y, x)
 115         case m == 0:
 116                 // n == 0 because m >= n; result is 0
 117                 return z.make(0)
 118         case n == 0:
 119                 // result is x
 120                 return z.set(x)
 121         }
 122         // m > 0
 123
 124         z = z.make(m + 1)
 125         c := addVV(z[0:n], x, y)
 126         if m > n {
 127                 c = addVW(z[n:m], x[n:], c)
 128         }
 129         z[m] = c
 130
 131         return z.norm()
 132 }
 133
 134 func (z nat) sub(x, y nat) nat {
 135         m := len(x)
 136         n := len(y)
 137
 138         switch {
 139         case m < n:
 140                 panic("underflow")
 141         case m == 0:
 142                 // n == 0 because m >= n; result is 0
 143                 return z.make(0)
 144         case n == 0:
 145                 // result is x
 146                 return z.set(x)
 147         }
 148         // m > 0
 149
 150         z = z.make(m)
 151         c := subVV(z[0:n], x, y)
 152         if m > n {
 153                 c = subVW(z[n:], x[n:], c)
 154         }
 155         if c != 0 {
 156                 panic("underflow")
 157         }
 158
 159         return z.norm()
 160 }
 161
 162 func (x nat) cmp(y nat) (r int) {
 163         m := len(x)
 164         n := len(y)
 165         if m != n || m == 0 {
 166                 switch {
 167                 case m < n:
 168                         r = -1
 169                 case m > n:
 170                         r = 1
 171                 }
 172                 return
 173         }
 174
 175         i := m - 1
 176         for i > 0 && x[i] == y[i] {
 177                 i--
 178         }
 179
 180         switch {
 181         case x[i] < y[i]:
 182                 r = -1
 183         case x[i] > y[i]:
 184                 r = 1
 185         }
 186         return
 187 }
 188
 189 func (z nat) mulAddWW(x nat, y, r Word) nat {
 190         m := len(x)
 191         if m == 0 || y == 0 {
 192                 return z.setWord(r) // result is r
 193         }
 194         // m > 0
 195
 196         z = z.make(m + 1)
 197         z[m] = mulAddVWW(z[0:m], x, y, r)
 198
 199         return z.norm()
 200 }
 201
 202 // basicMul multiplies x and y and leaves the result in z.
 203 // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
 204 func basicMul(z, x, y nat) {
 205         z[0 : len(x)+len(y)].clear() // initialize z
 206         for i, d := range y {
 207                 if d != 0 {
 208                         z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
 209                 }
 210         }
 211 }
 212
 213 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
 214 // Factored out for readability - do not use outside karatsuba.
 215 func karatsubaAdd(z, x nat, n int) {
 216         if c := addVV(z[0:n], z, x); c != 0 {
 217                 addVW(z[n:n+n>>1], z[n:], c)
 218         }
 219 }
 220
 221 // Like karatsubaAdd, but does subtract.
 222 func karatsubaSub(z, x nat, n int) {
 223         if c := subVV(z[0:n], z, x); c != 0 {
 224                 subVW(z[n:n+n>>1], z[n:], c)
 225         }
 226 }
 227
 228 // Operands that are shorter than karatsubaThreshold are multiplied using
 229 // "grade school" multiplication; for longer operands the Karatsuba algorithm
 230 // is used.
 231 var karatsubaThreshold int = 32 // computed by calibrate.go
 232
 233 // karatsuba multiplies x and y and leaves the result in z.
 234 // Both x and y must have the same length n and n must be a
 235 // power of 2. The result vector z must have len(z) >= 6*n.
 236 // The (non-normalized) result is placed in z[0 : 2*n].
 237 func karatsuba(z, x, y nat) {
 238         n := len(y)
 239
 240         // Switch to basic multiplication if numbers are odd or small.
 241         // (n is always even if karatsubaThreshold is even, but be
 242         // conservative)
 243         if n&1 != 0 || n < karatsubaThreshold || n < 2 {
 244                 basicMul(z, x, y)
 245                 return
 246         }
 247         // n&1 == 0 && n >= karatsubaThreshold && n >= 2
 248
 249         // Karatsuba multiplication is based on the observation that
 250         // for two numbers x and y with:
 251         //
 252         //   x = x1*b + x0
 253         //   y = y1*b + y0
 254         //
 255         // the product x*y can be obtained with 3 products z2, z1, z0
 256         // instead of 4:
 257         //
 258         //   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
 259         //       =    z2*b*b +              z1*b +    z0
 260         //
 261         // with:
 262         //
 263         //   xd = x1 - x0
 264         //   yd = y0 - y1
 265         //
 266         //   z1 =      xd*yd                    + z1 + z0
 267         //      = (x1-x0)*(y0 - y1)             + z1 + z0
 268         //      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0
 269         //      = x1*y0 -    z1 -    z0 + x0*y1 + z1 + z0
 270         //      = x1*y0                 + x0*y1
 271
 272         // split x, y into "digits"
 273         n2 := n >> 1              // n2 >= 1
 274         x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
 275         y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
 276
 277         // z is used for the result and temporary storage:
 278         //
 279         //   6*n     5*n     4*n     3*n     2*n     1*n     0*n
 280         // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
 281         //
 282         // For each recursive call of karatsuba, an unused slice of
 283         // z is passed in that has (at least) half the length of the
 284         // caller's z.
 285
 286         // compute z0 and z2 with the result "in place" in z
 287         karatsuba(z, x0, y0)     // z0 = x0*y0
 288         karatsuba(z[n:], x1, y1) // z2 = x1*y1
 289
 290         // compute xd (or the negative value if underflow occurs)
 291         s := 1 // sign of product xd*yd
 292         xd := z[2*n : 2*n+n2]
 293         if subVV(xd, x1, x0) != 0 { // x1-x0
 294                 s = -s
 295                 subVV(xd, x0, x1) // x0-x1
 296         }
 297
 298         // compute yd (or the negative value if underflow occurs)
 299         yd := z[2*n+n2 : 3*n]
 300         if subVV(yd, y0, y1) != 0 { // y0-y1
 301                 s = -s
 302                 subVV(yd, y1, y0) // y1-y0
 303         }
 304
 305         // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
 306         // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
 307         p := z[n*3:]
 308         karatsuba(p, xd, yd)
 309
 310         // save original z2:z0
 311         // (ok to use upper half of z since we're done recursing)
 312         r := z[n*4:]
 313         copy(r, z)
 314
 315         // add up all partial products
 316         //
 317         //   2*n     n     0
 318         // z = [ z2  | z0  ]
 319         //   +    [ z0  ]
 320         //   +    [ z2  ]
 321         //   +    [  p  ]
 322         //
 323         karatsubaAdd(z[n2:], r, n)
 324         karatsubaAdd(z[n2:], r[n:], n)
 325         if s > 0 {
 326                 karatsubaAdd(z[n2:], p, n)
 327         } else {
 328                 karatsubaSub(z[n2:], p, n)
 329         }
 330 }
 331
 332 // alias returns true if x and y share the same base array.
 333 func alias(x, y nat) bool {
 334         return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
 335 }
 336
 337 // addAt implements z += x*(1<<(_W*i)); z must be long enough.
 338 // (we don't use nat.add because we need z to stay the same
 339 // slice, and we don't need to normalize z after each addition)
 340 func addAt(z, x nat, i int) {
 341         if n := len(x); n > 0 {
 342                 if c := addVV(z[i:i+n], z[i:], x); c != 0 {
 343                         j := i + n
 344                         if j < len(z) {
 345                                 addVW(z[j:], z[j:], c)
 346                         }
 347                 }
 348         }
 349 }
 350
 351 func max(x, y int) int {
 352         if x > y {
 353                 return x
 354         }
 355         return y
 356 }
 357
 358 // karatsubaLen computes an approximation to the maximum k <= n such that
 359 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
 360 // result is the largest number that can be divided repeatedly by 2 before
 361 // becoming about the value of karatsubaThreshold.
 362 func karatsubaLen(n int) int {
 363         i := uint(0)
 364         for n > karatsubaThreshold {
 365                 n >>= 1
 366                 i++
 367         }
 368         return n << i
 369 }
 370
 371 func (z nat) mul(x, y nat) nat {
 372         m := len(x)
 373         n := len(y)
 374
 375         switch {
 376         case m < n:
 377                 return z.mul(y, x)
 378         case m == 0 || n == 0:
 379                 return z.make(0)
 380         case n == 1:
 381                 return z.mulAddWW(x, y[0], 0)
 382         }
 383         // m >= n > 1
 384
 385         // determine if z can be reused
 386         if alias(z, x) || alias(z, y) {
 387                 z = nil // z is an alias for x or y - cannot reuse
 388         }
 389
 390         // use basic multiplication if the numbers are small
 391         if n < karatsubaThreshold || n < 2 {
 392                 z = z.make(m + n)
 393                 basicMul(z, x, y)
 394                 return z.norm()
 395         }
 396         // m >= n && n >= karatsubaThreshold && n >= 2
 397
 398         // determine Karatsuba length k such that
 399         //
 400         //   x = x1*b + x0
 401         //   y = y1*b + y0  (and k <= len(y), which implies k <= len(x))
 402         //   b = 1<<(_W*k)  ("base" of digits xi, yi)
 403         //
 404         k := karatsubaLen(n)
 405         // k <= n
 406
 407         // multiply x0 and y0 via Karatsuba
 408         x0 := x[0:k]              // x0 is not normalized
 409         y0 := y[0:k]              // y0 is not normalized
 410         z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
 411         karatsuba(z, x0, y0)
 412         z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage
 413
 414         // If x1 and/or y1 are not 0, add missing terms to z explicitly:
 415         //
 416         //     m+n       2*k       0
 417         //   z = [   ...   | x0*y0 ]
 418         //     +   [ x1*y1 ]
 419         //     +   [ x1*y0 ]
 420         //     +   [ x0*y1 ]
 421         //
 422         if k < n || m != n {
 423                 x1 := x[k:] // x1 is normalized because x is
 424                 y1 := y[k:] // y1 is normalized because y is
 425                 var t nat
 426                 t = t.mul(x1, y1)
 427                 copy(z[2*k:], t)
 428                 z[2*k+len(t):].clear() // upper portion of z is garbage
 429                 t = t.mul(x1, y0.norm())
 430                 addAt(z, t, k)
 431                 t = t.mul(x0.norm(), y1)
 432                 addAt(z, t, k)
 433         }
 434
 435         return z.norm()
 436 }
 437
 438 // mulRange computes the product of all the unsigned integers in the
 439 // range [a, b] inclusively. If a > b (empty range), the result is 1.
 440 func (z nat) mulRange(a, b uint64) nat {
 441         switch {
 442         case a == 0:
 443                 // cut long ranges short (optimization)
 444                 return z.setUint64(0)
 445         case a > b:
 446                 return z.setUint64(1)
 447         case a == b:
 448                 return z.setUint64(a)
 449         case a+1 == b:
 450                 return z.mul(nat{}.setUint64(a), nat{}.setUint64(b))
 451         }
 452         m := (a + b) / 2
 453         return z.mul(nat{}.mulRange(a, m), nat{}.mulRange(m+1, b))
 454 }
 455
 456 // q = (x-r)/y, with 0 <= r < y
 457 func (z nat) divW(x nat, y Word) (q nat, r Word) {
 458         m := len(x)
 459         switch {
 460         case y == 0:
 461                 panic("division by zero")
 462         case y == 1:
 463                 q = z.set(x) // result is x
 464                 return
 465         case m == 0:
 466                 q = z.make(0) // result is 0
 467                 return
 468         }
 469         // m > 0
 470         z = z.make(m)
 471         r = divWVW(z, 0, x, y)
 472         q = z.norm()
 473         return
 474 }
 475
 476 func (z nat) div(z2, u, v nat) (q, r nat) {
 477         if len(v) == 0 {
 478                 panic("division by zero")
 479         }
 480
 481         if u.cmp(v) < 0 {
 482                 q = z.make(0)
 483                 r = z2.set(u)
 484                 return
 485         }
 486
 487         if len(v) == 1 {
 488                 var rprime Word
 489                 q, rprime = z.divW(u, v[0])
 490                 if rprime > 0 {
 491                         r = z2.make(1)
 492                         r[0] = rprime
 493                 } else {
 494                         r = z2.make(0)
 495                 }
 496                 return
 497         }
 498
 499         q, r = z.divLarge(z2, u, v)
 500         return
 501 }
 502
 503 // q = (uIn-r)/v, with 0 <= r < y
 504 // Uses z as storage for q, and u as storage for r if possible.
 505 // See Knuth, Volume 2, section 4.3.1, Algorithm D.
 506 // Preconditions:
 507 //    len(v) >= 2
 508 //    len(uIn) >= len(v)
 509 func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
 510         n := len(v)
 511         m := len(uIn) - n
 512
 513         // determine if z can be reused
 514         // TODO(gri) should find a better solution - this if statement
 515         //           is very costly (see e.g. time pidigits -s -n 10000)
 516         if alias(z, uIn) || alias(z, v) {
 517                 z = nil // z is an alias for uIn or v - cannot reuse
 518         }
 519         q = z.make(m + 1)
 520
 521         qhatv := make(nat, n+1)
 522         if alias(u, uIn) || alias(u, v) {
 523                 u = nil // u is an alias for uIn or v - cannot reuse
 524         }
 525         u = u.make(len(uIn) + 1)
 526         u.clear()
 527
 528         // D1.
 529         shift := leadingZeros(v[n-1])
 530         if shift > 0 {
 531                 // do not modify v, it may be used by another goroutine simultaneously
 532                 v1 := make(nat, n)
 533                 shlVU(v1, v, shift)
 534                 v = v1
 535         }
 536         u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
 537
 538         // D2.
 539         for j := m; j >= 0; j-- {
 540                 // D3.
 541                 qhat := Word(_M)
 542                 if u[j+n] != v[n-1] {
 543                         var rhat Word
 544                         qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
 545
 546                         // x1 | x2 = q̂v_{n-2}
 547                         x1, x2 := mulWW(qhat, v[n-2])
 548                         // test if q̂v_{n-2} > br̂ + u_{j+n-2}
 549                         for greaterThan(x1, x2, rhat, u[j+n-2]) {
 550                                 qhat--
 551                                 prevRhat := rhat
 552                                 rhat += v[n-1]
 553                                 // v[n-1] >= 0, so this tests for overflow.
 554                                 if rhat < prevRhat {
 555                                         break
 556                                 }
 557                                 x1, x2 = mulWW(qhat, v[n-2])
 558                         }
 559                 }
 560
 561                 // D4.
 562                 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
 563
 564                 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
 565                 if c != 0 {
 566                         c := addVV(u[j:j+n], u[j:], v)
 567                         u[j+n] += c
 568                         qhat--
 569                 }
 570
 571                 q[j] = qhat
 572         }
 573
 574         q = q.norm()
 575         shrVU(u, u, shift)
 576         r = u.norm()
 577
 578         return q, r
 579 }
 580
 581 // Length of x in bits. x must be normalized.
 582 func (x nat) bitLen() int {
 583         if i := len(x) - 1; i >= 0 {
 584                 return i*_W + bitLen(x[i])
 585         }
 586         return 0
 587 }
 588
 589 // MaxBase is the largest number base accepted for string conversions.
 590 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
 591
 592 func hexValue(ch rune) Word {
 593         d := MaxBase + 1 // illegal base
 594         switch {
 595         case '0' <= ch && ch <= '9':
 596                 d = int(ch - '0')
 597         case 'a' <= ch && ch <= 'z':
 598                 d = int(ch - 'a' + 10)
 599         case 'A' <= ch && ch <= 'Z':
 600                 d = int(ch - 'A' + 10)
 601         }
 602         return Word(d)
 603 }
 604
 605 // scan sets z to the natural number corresponding to the longest possible prefix
 606 // read from r representing an unsigned integer in a given conversion base.
 607 // It returns z, the actual conversion base used, and an error, if any. In the
 608 // error case, the value of z is undefined. The syntax follows the syntax of
 609 // unsigned integer literals in Go.
 610 //
 611 // The base argument must be 0 or a value from 2 through MaxBase. If the base
 612 // is 0, the string prefix determines the actual conversion base. A prefix of
 613 // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
 614 // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
 615 //
 616 func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
 617         // reject illegal bases
 618         if base < 0 || base == 1 || MaxBase < base {
 619                 return z, 0, errors.New("illegal number base")
 620         }
 621
 622         // one char look-ahead
 623         ch, _, err := r.ReadRune()
 624         if err != nil {
 625                 return z, 0, err
 626         }
 627
 628         // determine base if necessary
 629         b := Word(base)
 630         if base == 0 {
 631                 b = 10
 632                 if ch == '0' {
 633                         switch ch, _, err = r.ReadRune(); err {
 634                         case nil:
 635                                 b = 8
 636                                 switch ch {
 637                                 case 'x', 'X':
 638                                         b = 16
 639                                 case 'b', 'B':
 640                                         b = 2
 641                                 }
 642                                 if b == 2 || b == 16 {
 643                                         if ch, _, err = r.ReadRune(); err != nil {
 644                                                 return z, 0, err
 645                                         }
 646                                 }
 647                         case io.EOF:
 648                                 return z.make(0), 10, nil
 649                         default:
 650                                 return z, 10, err
 651                         }
 652                 }
 653         }
 654
 655         // convert string
 656         // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
 657         // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
 658         z = z.make(0)
 659         bb := Word(1)
 660         dd := Word(0)
 661         for max := _M / b; ; {
 662                 d := hexValue(ch)
 663                 if d >= b {
 664                         r.UnreadRune() // ch does not belong to number anymore
 665                         break
 666                 }
 667
 668                 if bb <= max {
 669                         bb *= b
 670                         dd = dd*b + d
 671                 } else {
 672                         // bb * b would overflow
 673                         z = z.mulAddWW(z, bb, dd)
 674                         bb = b
 675                         dd = d
 676                 }
 677
 678                 if ch, _, err = r.ReadRune(); err != nil {
 679                         if err != io.EOF {
 680                                 return z, int(b), err
 681                         }
 682                         break
 683                 }
 684         }
 685
 686         switch {
 687         case bb > 1:
 688                 // there was at least one mantissa digit
 689                 z = z.mulAddWW(z, bb, dd)
 690         case base == 0 && b == 8:
 691                 // there was only the octal prefix 0 (possibly followed by digits > 7);
 692                 // return base 10, not 8
 693                 return z, 10, nil
 694         case base != 0 || b != 8:
 695                 // there was neither a mantissa digit nor the octal prefix 0
 696                 return z, int(b), errors.New("syntax error scanning number")
 697         }
 698
 699         return z.norm(), int(b), nil
 700 }
 701
 702 // Character sets for string conversion.
 703 const (
 704         lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
 705         uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 706 )
 707
 708 // decimalString returns a decimal representation of x.
 709 // It calls x.string with the charset "0123456789".
 710 func (x nat) decimalString() string {
 711         return x.string(lowercaseDigits[0:10])
 712 }
 713
 714 // string converts x to a string using digits from a charset; a digit with
 715 // value d is represented by charset[d]. The conversion base is determined
 716 // by len(charset), which must be >= 2.
 717 func (x nat) string(charset string) string {
 718         b := Word(len(charset))
 719
 720         // special cases
 721         switch {
 722         case b < 2 || b > 256:
 723                 panic("illegal base")
 724         case len(x) == 0:
 725                 return string(charset[0])
 726         }
 727
 728         // allocate buffer for conversion
 729         i := x.bitLen()/log2(b) + 1 // +1: round up
 730         s := make([]byte, i)
 731
 732         // special case: power of two bases can avoid divisions completely
 733         if b == b&-b {
 734                 // shift is base-b digit size in bits
 735                 shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
 736                 mask := Word(1)<<shift - 1
 737                 w := x[0]
 738                 nbits := uint(_W) // number of unprocessed bits in w
 739
 740                 // convert less-significant words
 741                 for k := 1; k < len(x); k++ {
 742                         // convert full digits
 743                         for nbits >= shift {
 744                                 i--
 745                                 s[i] = charset[w&mask]
 746                                 w >>= shift
 747                                 nbits -= shift
 748                         }
 749
 750                         // convert any partial leading digit and advance to next word
 751                         if nbits == 0 {
 752                                 // no partial digit remaining, just advance
 753                                 w = x[k]
 754                                 nbits = _W
 755                         } else {
 756                                 // partial digit in current (k-1) and next (k) word
 757                                 w |= x[k] << nbits
 758                                 i--
 759                                 s[i] = charset[w&mask]
 760
 761                                 // advance
 762                                 w = x[k] >> (shift - nbits)
 763                                 nbits = _W - (shift - nbits)
 764                         }
 765                 }
 766
 767                 // convert digits of most-significant word (omit leading zeros)
 768                 for nbits >= 0 && w != 0 {
 769                         i--
 770                         s[i] = charset[w&mask]
 771                         w >>= shift
 772                         nbits -= shift
 773                 }
 774
 775                 return string(s[i:])
 776         }
 777
 778         // general case: extract groups of digits by multiprecision division
 779
 780         // maximize ndigits where b**ndigits < 2^_W; bb (big base) is b**ndigits
 781         bb := Word(1)
 782         ndigits := 0
 783         for max := Word(_M / b); bb <= max; bb *= b {
 784                 ndigits++
 785         }
 786
 787         // preserve x, create local copy for use in repeated divisions
 788         q := nat{}.set(x)
 789         var r Word
 790
 791         // convert
 792         if b == 10 { // hard-coding for 10 here speeds this up by 1.25x
 793                 for len(q) > 0 {
 794                         // extract least significant, base bb "digit"
 795                         q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
 796                         if len(q) == 0 {
 797                                 // skip leading zeros in most-significant group of digits
 798                                 for j := 0; j < ndigits && r != 0; j++ {
 799                                         i--
 800                                         s[i] = charset[r%10]
 801                                         r /= 10
 802                                 }
 803                         } else {
 804                                 for j := 0; j < ndigits; j++ {
 805                                         i--
 806                                         s[i] = charset[r%10]
 807                                         r /= 10
 808                                 }
 809                         }
 810                 }
 811         } else {
 812                 for len(q) > 0 {
 813                         // extract least significant group of digits
 814                         q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
 815                         if len(q) == 0 {
 816                                 // skip leading zeros in most-significant group of digits
 817                                 for j := 0; j < ndigits && r != 0; j++ {
 818                                         i--
 819                                         s[i] = charset[r%b]
 820                                         r /= b
 821                                 }
 822                         } else {
 823                                 for j := 0; j < ndigits; j++ {
 824                                         i--
 825                                         s[i] = charset[r%b]
 826                                         r /= b
 827                                 }
 828                         }
 829                 }
 830         }
 831
 832         return string(s[i:])
 833 }
 834
 835 const deBruijn32 = 0x077CB531
 836
 837 var deBruijn32Lookup = []byte{
 838         0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 839         31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
 840 }
 841
 842 const deBruijn64 = 0x03f79d71b4ca8b09
 843
 844 var deBruijn64Lookup = []byte{
 845         0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
 846         62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
 847         63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
 848         54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
 849 }
 850
 851 // trailingZeroBits returns the number of consecutive zero bits on the right
 852 // side of the given Word.
 853 // See Knuth, volume 4, section 7.3.1
 854 func trailingZeroBits(x Word) int {
 855         // x & -x leaves only the right-most bit set in the word. Let k be the
 856         // index of that bit. Since only a single bit is set, the value is two
 857         // to the power of k. Multiplying by a power of two is equivalent to
 858         // left shifting, in this case by k bits.  The de Bruijn constant is
 859         // such that all six bit, consecutive substrings are distinct.
 860         // Therefore, if we have a left shifted version of this constant we can
 861         // find by how many bits it was shifted by looking at which six bit
 862         // substring ended up at the top of the word.
 863         switch _W {
 864         case 32:
 865                 return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
 866         case 64:
 867                 return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
 868         default:
 869                 panic("Unknown word size")
 870         }
 871
 872         return 0
 873 }
 874
 875 // z = x << s
 876 func (z nat) shl(x nat, s uint) nat {
 877         m := len(x)
 878         if m == 0 {
 879                 return z.make(0)
 880         }
 881         // m > 0
 882
 883         n := m + int(s/_W)
 884         z = z.make(n + 1)
 885         z[n] = shlVU(z[n-m:n], x, s%_W)
 886         z[0 : n-m].clear()
 887
 888         return z.norm()
 889 }
 890
 891 // z = x >> s
 892 func (z nat) shr(x nat, s uint) nat {
 893         m := len(x)
 894         n := m - int(s/_W)
 895         if n <= 0 {
 896                 return z.make(0)
 897         }
 898         // n > 0
 899
 900         z = z.make(n)
 901         shrVU(z, x[m-n:], s%_W)
 902
 903         return z.norm()
 904 }
 905
 906 func (z nat) setBit(x nat, i uint, b uint) nat {
 907         j := int(i / _W)
 908         m := Word(1) << (i % _W)
 909         n := len(x)
 910         switch b {
 911         case 0:
 912                 z = z.make(n)
 913                 copy(z, x)
 914                 if j >= n {
 915                         // no need to grow
 916                         return z
 917                 }
 918                 z[j] &^= m
 919                 return z.norm()
 920         case 1:
 921                 if j >= n {
 922                         n = j + 1
 923                 }
 924                 z = z.make(n)
 925                 copy(z, x)
 926                 z[j] |= m
 927                 // no need to normalize
 928                 return z
 929         }
 930         panic("set bit is not 0 or 1")
 931 }
 932
 933 func (z nat) bit(i uint) uint {
 934         j := int(i / _W)
 935         if j >= len(z) {
 936                 return 0
 937         }
 938         return uint(z[j] >> (i % _W) & 1)
 939 }
 940
 941 func (z nat) and(x, y nat) nat {
 942         m := len(x)
 943         n := len(y)
 944         if m > n {
 945                 m = n
 946         }
 947         // m <= n
 948
 949         z = z.make(m)
 950         for i := 0; i < m; i++ {
 951                 z[i] = x[i] & y[i]
 952         }
 953
 954         return z.norm()
 955 }
 956
 957 func (z nat) andNot(x, y nat) nat {
 958         m := len(x)
 959         n := len(y)
 960         if n > m {
 961                 n = m
 962         }
 963         // m >= n
 964
 965         z = z.make(m)
 966         for i := 0; i < n; i++ {
 967                 z[i] = x[i] &^ y[i]
 968         }
 969         copy(z[n:m], x[n:m])
 970
 971         return z.norm()
 972 }
 973
 974 func (z nat) or(x, y nat) nat {
 975         m := len(x)
 976         n := len(y)
 977         s := x
 978         if m < n {
 979                 n, m = m, n
 980                 s = y
 981         }
 982         // m >= n
 983
 984         z = z.make(m)
 985         for i := 0; i < n; i++ {
 986                 z[i] = x[i] | y[i]
 987         }
 988         copy(z[n:m], s[n:m])
 989
 990         return z.norm()
 991 }
 992
 993 func (z nat) xor(x, y nat) nat {
 994         m := len(x)
 995         n := len(y)
 996         s := x
 997         if m < n {
 998                 n, m = m, n
 999                 s = y
1000         }
1001         // m >= n
1002
1003         z = z.make(m)
1004         for i := 0; i < n; i++ {
1005                 z[i] = x[i] ^ y[i]
1006         }
1007         copy(z[n:m], s[n:m])
1008
1009         return z.norm()
1010 }
1011
1012 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
1013 func greaterThan(x1, x2, y1, y2 Word) bool {
1014         return x1 > y1 || x1 == y1 && x2 > y2
1015 }
1016
1017 // modW returns x % d.
1018 func (x nat) modW(d Word) (r Word) {
1019         // TODO(agl): we don't actually need to store the q value.
1020         var q nat
1021         q = q.make(len(x))
1022         return divWVW(q, 0, x, d)
1023 }
1024
1025 // powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0.
1026 func (x nat) powersOfTwoDecompose() (q nat, k int) {
1027         if len(x) == 0 {
1028                 return x, 0
1029         }
1030
1031         // One of the words must be non-zero by definition,
1032         // so this loop will terminate with i < len(x), and
1033         // i is the number of 0 words.
1034         i := 0
1035         for x[i] == 0 {
1036                 i++
1037         }
1038         n := trailingZeroBits(x[i]) // x[i] != 0
1039
1040         q = make(nat, len(x)-i)
1041         shrVU(q, x[i:], uint(n))
1042
1043         q = q.norm()
1044         k = i*_W + n
1045         return
1046 }
1047
1048 // random creates a random integer in [0..limit), using the space in z if
1049 // possible. n is the bit length of limit.
1050 func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
1051         bitLengthOfMSW := uint(n % _W)
1052         if bitLengthOfMSW == 0 {
1053                 bitLengthOfMSW = _W
1054         }
1055         mask := Word((1 << bitLengthOfMSW) - 1)
1056         z = z.make(len(limit))
1057
1058         for {
1059                 for i := range z {
1060                         switch _W {
1061                         case 32:
1062                                 z[i] = Word(rand.Uint32())
1063                         case 64:
1064                                 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
1065                         }
1066                 }
1067
1068                 z[len(limit)-1] &= mask
1069
1070                 if z.cmp(limit) < 0 {
1071                         break
1072                 }
1073         }
1074
1075         return z.norm()
1076 }
1077
1078 // If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It
1079 // reuses the storage of z if possible.
1080 func (z nat) expNN(x, y, m nat) nat {
1081         if alias(z, x) || alias(z, y) {
1082                 // We cannot allow in place modification of x or y.
1083                 z = nil
1084         }
1085
1086         if len(y) == 0 {
1087                 z = z.make(1)
1088                 z[0] = 1
1089                 return z
1090         }
1091
1092         if m != nil {
1093                 // We likely end up being as long as the modulus.
1094                 z = z.make(len(m))
1095         }
1096         z = z.set(x)
1097         v := y[len(y)-1]
1098         // It's invalid for the most significant word to be zero, therefore we
1099         // will find a one bit.
1100         shift := leadingZeros(v) + 1
1101         v <<= shift
1102         var q nat
1103
1104         const mask = 1 << (_W - 1)
1105
1106         // We walk through the bits of the exponent one by one. Each time we
1107         // see a bit, we square, thus doubling the power. If the bit is a one,
1108         // we also multiply by x, thus adding one to the power.
1109
1110         w := _W - int(shift)
1111         for j := 0; j < w; j++ {
1112                 z = z.mul(z, z)
1113
1114                 if v&mask != 0 {
1115                         z = z.mul(z, x)
1116                 }
1117
1118                 if m != nil {
1119                         q, z = q.div(z, z, m)
1120                 }
1121
1122                 v <<= 1
1123         }
1124
1125         for i := len(y) - 2; i >= 0; i-- {
1126                 v = y[i]
1127
1128                 for j := 0; j < _W; j++ {
1129                         z = z.mul(z, z)
1130
1131                         if v&mask != 0 {
1132                                 z = z.mul(z, x)
1133                         }
1134
1135                         if m != nil {
1136                                 q, z = q.div(z, z, m)
1137                         }
1138
1139                         v <<= 1
1140                 }
1141         }
1142
1143         return z
1144 }
1145
1146 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
1147 // If it returns true, n is prime with probability 1 - 1/4^reps.
1148 // If it returns false, n is not prime.
1149 func (n nat) probablyPrime(reps int) bool {
1150         if len(n) == 0 {
1151                 return false
1152         }
1153
1154         if len(n) == 1 {
1155                 if n[0] < 2 {
1156                         return false
1157                 }
1158
1159                 if n[0]%2 == 0 {
1160                         return n[0] == 2
1161                 }
1162
1163                 // We have to exclude these cases because we reject all
1164                 // multiples of these numbers below.
1165                 switch n[0] {
1166                 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
1167                         return true
1168                 }
1169         }
1170
1171         const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
1172         const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
1173
1174         var r Word
1175         switch _W {
1176         case 32:
1177                 r = n.modW(primesProduct32)
1178         case 64:
1179                 r = n.modW(primesProduct64 & _M)
1180         default:
1181                 panic("Unknown word size")
1182         }
1183
1184         if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
1185                 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
1186                 return false
1187         }
1188
1189         if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
1190                 r%43 == 0 || r%47 == 0 || r%53 == 0) {
1191                 return false
1192         }
1193
1194         nm1 := nat{}.sub(n, natOne)
1195         // 1<<k * q = nm1;
1196         q, k := nm1.powersOfTwoDecompose()
1197
1198         nm3 := nat{}.sub(nm1, natTwo)
1199         rand := rand.New(rand.NewSource(int64(n[0])))
1200
1201         var x, y, quotient nat
1202         nm3Len := nm3.bitLen()
1203
1204 NextRandom:
1205         for i := 0; i < reps; i++ {
1206                 x = x.random(rand, nm3, nm3Len)
1207                 x = x.add(x, natTwo)
1208                 y = y.expNN(x, q, n)
1209                 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
1210                         continue
1211                 }
1212                 for j := 1; j < k; j++ {
1213                         y = y.mul(y, y)
1214                         quotient, y = quotient.div(y, y, n)
1215                         if y.cmp(nm1) == 0 {
1216                                 continue NextRandom
1217                         }
1218                         if y.cmp(natOne) == 0 {
1219                                 return false
1220                         }
1221                 }
1222                 return false
1223         }
1224
1225         return true
1226 }
1227
1228 // bytes writes the value of z into buf using big-endian encoding.
1229 // len(buf) must be >= len(z)*_S. The value of z is encoded in the
1230 // slice buf[i:]. The number i of unused bytes at the beginning of
1231 // buf is returned as result.
1232 func (z nat) bytes(buf []byte) (i int) {
1233         i = len(buf)
1234         for _, d := range z {
1235                 for j := 0; j < _S; j++ {
1236                         i--
1237                         buf[i] = byte(d)
1238                         d >>= 8
1239                 }
1240         }
1241
1242         for i < len(buf) && buf[i] == 0 {
1243                 i++
1244         }
1245
1246         return
1247 }
1248
1249 // setBytes interprets buf as the bytes of a big-endian unsigned
1250 // integer, sets z to that value, and returns z.
1251 func (z nat) setBytes(buf []byte) nat {
1252         z = z.make((len(buf) + _S - 1) / _S)
1253
1254         k := 0
1255         s := uint(0)
1256         var d Word
1257         for i := len(buf); i > 0; i-- {
1258                 d |= Word(buf[i-1]) << s
1259                 if s += 8; s == _S*8 {
1260                         z[k] = d
1261                         k++
1262                         s = 0
1263                         d = 0
1264                 }
1265         }
1266         if k < len(z) {
1267                 z[k] = d
1268         }
1269
1270         return z.norm()
1271 }