Faster Bignum#*

e$B1sF#$H?=$7$^$9!#e(B

Bignum#* e$B$Ke(B FFT e$B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#e(B
10 e$B?J$Ge(B 100000
e$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#e(B
e$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#e(B

$ cat …/test_fft.rb
require “benchmark”

n = 10**10000
puts Benchmark.measure { 100.times { n * n } }

n = 10**100000
puts Benchmark.measure { 10.times { n * n } }

n = 10**1000000
puts Benchmark.measure { 1.times { n * n } }

e$B8=>ue(B

$ ./ruby.org …/test_fft.rb
2.590000 0.000000 2.590000 ( 2.874859)
26.040000 0.010000 26.050000 ( 28.927583)
266.540000 0.040000 266.580000 (276.491797)

FFT e$BHGe(B

$ ./ruby.fft …/test_fft.rb
2.630000 0.010000 2.640000 ( 2.636339)
2.790000 0.040000 2.830000 ( 2.836020)
6.270000 0.090000 6.360000 ( 6.368478)

FFT e$B$N<BAu$O652J=qDL$j$G$9!#4p?t$be(B 65536 (=2**(8sizeof(short)))
e$B$+e(B
256 (=2**(8
sizeof(char))) e$B$N7h$aBG$A$G$9!#%U!<%j%(JQ49$H$+$h$/e(B
e$B$o$+$C$F$J$$$N$G!"%P%0$C$F$?$i$9$_$^$;$s!#>$7$$J}$K8!>Z$d:F<BAu$re(B
e$B$7$FD:$1$?$i:G9b$G$9!#e(B

e$B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)e(B

Index: bignum.c

— bignum.c (revision 14308)
+++ bignum.c (working copy)
@@ -1442,10 +1442,145 @@
}
}

+static void
+big_fft(double *ar, double *ai, int al, double theta)
+{

  • int m, mh, i, j, k, irev;
  • double wr, wi, xr, xi;
  • i = 0;
  • for (j = 1; j < al - 1; j++) {
  • for (k = al >> 1; k > (i ^= k); k >>= 1);
  • if (j < i) {
  •  xr = ar[j];
    
  •  xi = ai[j];
    
  •  ar[j] = ar[i];
    
  •  ai[j] = ai[i];
    
  •  ar[i] = xr;
    
  •  ai[i] = xi;
    
  • }
  • }
  • for (mh = 1; (m = mh << 1) <= al; mh = m) {
  • irev = 0;
  • for (i = 0; i < al; i += m) {
  •  wr = cos(theta * irev);
    
  •  wi = sin(theta * irev);
    
  •  for (k = al >> 2; k > (irev ^= k); k >>= 1);
    
  •  for (j = i; j < mh + i; j++) {
    
  • k = j + mh;
  • xr = ar[j] - ar[k];
  • xi = ai[j] - ai[k];
  • ar[j] += ar[k];
  • ai[j] += ai[k];
  • ar[k] = wr * xr - wi * xi;
  • ai[k] = wr * xi + wi * xr;
  •  }
    
  • }
  • }
    +}

+static void
+big_digits_to_bases(int base, double *xr, long rlen, BDIGIT *xds, long
dlen)
+{

  • long i, j, k, div = 1 << (base * CHAR_BIT);
  • BDIGIT v;
  • k = 0;
  • for (i = 0; i < dlen; i++) {
  • v = xds[i];
  • for (j = 0; j < sizeof(BDIGIT) / base; j++) {
  •  xr[k++] = (double) (v % div);
    
  •  v /= div;
    
  • }
  • }
  • for (; k < rlen; k++) xr[k] = 0;
    +}

+static void
+big_bases_to_digits(int base, BDIGIT *zds, long dlen, double *xr)
+{

  • long i, j, k, div = 1 << (base * CHAR_BIT);
  • BDIGIT v, s;
  • double x, xm;
  • x = 0;
  • i = j = 0;
  • v = 0;
  • s = 1;
  • for (k = 0;; k++) {
  • x += floor(xr[k] + 0.5);
  • xm = fmod(x, div);
  • v |= ((BDIGIT) xm) * s;
  • x = (x - xm) / div;
  • s *= div;
  • if (++j >= sizeof(BDIGIT) / base) {
  •  zds[i++] = v;
    
  •  if (i == dlen) return;
    
  •  j = 0;
    
  •  v = 0;
    
  •  s = 1;
    
  • }
  • }
    +}

static VALUE
+big_mul_fft(VALUE x, VALUE y)
+{

  • long i, len, base;
  • VALUE z;
  • double *xr, *xi, *yr, *yi, pi = atan(1.0) * 4.0;
  • /* calculate FFT length and decide the base of radix */
  • base = sizeof(short);
  • i = (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1) * sizeof(BDIGIT) / base;
  • for (len = 1; len < i; len *= 2);
  • if (len > 131072) {
  • len *= sizeof(short) / sizeof(char);
  • base = sizeof(char);
  • }
  • /* allocate two arraies of complexes */
  • xr = ALLOC_N(double, len);
  • xi = ALLOC_N(double, len);
  • yr = ALLOC_N(double, len);
  • yi = ALLOC_N(double, len);
  • /* initialize the arraies */
  • big_digits_to_bases(base, xr, len, BDIGITS(x), RBIGNUM_LEN(x));
  • big_digits_to_bases(base, yr, len, BDIGITS(y), RBIGNUM_LEN(y));
  • for (i = 0; i < len; i++) xi[i] = yi[i] = 0;
  • /* perform FFT */
  • big_fft(xr, xi, len, 2 * pi / len);
  • big_fft(yr, yi, len, 2 * pi / len);
  • /* multiply the arraies */
  • for (i = 0; i < len; i++) {
  • double t = xr[i] * yr[i] - xi[i] * yi[i];
  • xi[i] = xr[i] * yi[i] + xi[i] * yr[i];
  • xr[i] = t;
  • }
  • free(yr);
  • free(yi);
  • /* perform IFFT */
  • big_fft(xr, xi, len, -2 * pi / len);
  • free(xi);
  • for (i = 0; i < len; i++) xr[i] /= len;
  • /* make a result bignum */
  • len = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
  • z = bignew(len, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
  • big_bases_to_digits(base, BDIGITS(z), len, xr);
  • free(xr);
  • return z;
    +}

+static VALUE
rb_big_mul0(VALUE x, VALUE y)
{

  • long i, j;
  • long i, j, nx, ny, len;
    BDIGIT_DBL n = 0;
    VALUE z;
    BDIGIT *zds;
    @@ -1465,6 +1600,22 @@
    return rb_num_coerce_bin(x, y);
    }

  • /* estimate time */

  • for (i = nx = 0; i < RBIGNUM_LEN(x); i++) if (BDIGITS(x)[i]) nx++;

  • for (i = ny = 0; i < RBIGNUM_LEN(y); i++) if (BDIGITS(y)[i]) ny++;

  • for (len = 1; RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > len; len *= 2);

  • /* swap x and y (when y is more sparse than x) */

  • if (ny < nx) {

  • z = x; x = y; y = z; nx = ny;

  • }

  • /* multiply by FFT (if FFT seems to be faster) */

  • if ((double) nx * RBIGNUM_LEN(y) > (double) len * log(len) * 30) {

  • return big_mul_fft(x, y);

  • }

  • /* multiply in a normal way */
    j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
    z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
    zds = BDIGITS(z);

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:32629] faster Bignum#*”
on Wed, 19 Dec 2007 00:18:17 +0900, “Yusuke ENDOH” [email protected]
writes:

|Bignum#* e$B$Ke(B FFT e$B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#e(B
|10 e$B?J$Ge(B 100000 e$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#e(B
|e$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#e(B

e$B$&!<$s!“$”$k$N$+$J!#%/%j%9%^%9$,=*$o$C$F$+$i<h$j9~$`J}8~$G9Me(B
e$B$($^$9!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:32632] Re: faster Bignum#*”
on Wed, 19 Dec 2007 12:49:02 +0900, “Yusuke ENDOH” [email protected]
writes:

|e$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!“2x$7$$e(B
|e$B<{MW$O$?$^$K$”$j$^$9e(B (e$B>Pe(B)

e$B$J$k$[$I!#e(Bw

|e$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$de(B to_s e$B$r$7$?$H$-e(B Ctrl-C e$B$,$-$+$J$/$J$ke(B
|e$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#e(B
|e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B rb_thread_blocking_region e$B$Ge(B
|e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

e$B;d$O<h$j9~$_$?$$$N$G$9$,!"$3$N%Q%C%A$N5;=QE*BEEv@-$,H=CG$G$-e(B
e$B$^$;$s!#$5$5$@$/$s$,e(BOKe$B$H8@$C$?$i<h$j9~$`$H$$$&$3$H$G!#e(B

e$B1sF#$G$9!#e(B

07/12/19 e$B$Ke(B Yukihiro M.[email protected]
e$B$5$s$O=q$-$^$7$?e(B:

e$B$($^$9!#e(B
e$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!“2x$7$$e(B
e$B<{MW$O$?$^$K$”$j$^$9e(B (e$B>Pe(B)

e$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$de(B to_s e$B$r$7$?$H$-e(B Ctrl-C
e$B$,$-$+$J$/$J$ke(B
e$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#e(B
e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B
rb_thread_blocking_region e$B$Ge(B
e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

Index: bignum.c

— bignum.c (revision 14313)
+++ bignum.c (working copy)
@@ -1442,34 +1442,32 @@
}
}

+static void
+rb_big_stop(void *ptr)
+{

  • VALUE stop = (VALUE)ptr;
  • *stop = Qtrue;
    +}

+struct big_mul_struct {

  • VALUE x, y, stop;
    +};

static VALUE
-rb_big_mul0(VALUE x, VALUE y)
+bigmul1(void *ptr)
{

  • struct big_mul_struct bms = (struct big_mul_struct)ptr;
    long i, j;
    BDIGIT_DBL n = 0;
  • VALUE z;
  • VALUE x = bms->x, y = bms->y, z;
    BDIGIT *zds;
  • switch (TYPE(y)) {
  •  case T_FIXNUM:
    
  • y = rb_int2big(FIX2LONG(y));
  • break;
  •  case T_BIGNUM:
    
  • break;
  •  case T_FLOAT:
    
  • return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
  •  default:
    
  • return rb_num_coerce_bin(x, y);
  • }
  • j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
    z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
    zds = BDIGITS(z);
    while (j–) zds[j] = 0;
    for (i = 0; i < RBIGNUM_LEN(x); i++) {
  • if (bms->stop) return Qnil;
    BDIGIT_DBL dd = BDIGITS(x)[i];
    if (dd == 0) continue;
    n = 0;
    @@ -1486,6 +1484,42 @@
    return z;
    }

+static VALUE
+rb_big_mul0(VALUE x, VALUE y)
+{

  • struct big_mul_struct bms;
  • VALUE z;
  • switch (TYPE(y)) {
  •  case T_FIXNUM:
    
  • y = rb_int2big(FIX2LONG(y));
  • break;
  •  case T_BIGNUM:
    
  • break;
  •  case T_FLOAT:
    
  • return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
  •  default:
    
  • return rb_num_coerce_bin(x, y);
  • }
  • bms.x = x;
  • bms.y = y;
  • bms.stop = Qfalse;
  • if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {
  • VALUE stop = Qfalse;
  • z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop);
  • }
  • else {
  • z = bigmul1(&bms);
  • }
  • return z;
    +}

/*

  • call-seq:
  • big * other  => Numeric
    

@@ -1499,9 +1533,15 @@
return bignorm(rb_big_mul0(x, y));
}

-static void
-bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+struct big_div_struct {

  • VALUE x, y, *divp, *modp, stop;
    +};

+static VALUE
+bigdivrem1(void *ptr)
{

  • struct big_div_struct bds = (struct big_div_struct)ptr;

  • VALUE x = bds->x, y = bds->y, *divp = bds->divp, *modp = bds->modp;
    long nx = RBIGNUM_LEN(x), ny = RBIGNUM_LEN(y);
    long i, j;
    VALUE yy, z;
    @@ -1575,6 +1615,7 @@

    j = nx==ny?nx+1:nx;
    do {

  • if (bds->stop) return Qnil;
    if (zds[j] == yds[ny-1]) q = BIGRAD-1;
    else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]);
    if (q) {
    @@ -1627,6 +1668,27 @@
    }
    }

+static VALUE
+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+{

  • struct big_div_struct bds;
  • VALUE z;
  • bds.x = x;
  • bds.y = y;
  • bds.divp = divp;
  • bds.modp = modp;
  • bds.stop = Qfalse;
  • if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {
  • VALUE stop = Qfalse;
  • z = rb_thread_blocking_region(bigdivrem1, &bds, rb_big_stop,
    &bds.stop);
  • }
  • else {
  • z = bigdivrem1(&bds);
  • }
  • return z;
    +}

static void
bigdivmod(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
{

e$BK-J!$G$9!#e(B

In [ruby-dev:33139]

e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B rb_thread_blocking_region e$B$Ge(B
e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

+rb_big_mul0(VALUE x, VALUE y)

  • if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {

+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)

  • if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {

e$B!J8=:_;H$C$F$$$k>h;;!"=|;;%"%k%4%j%:%`$N!K7W;;NL$+$i$9$k$He(B
if (nx*ny > 10000) {
e$B$He(B
if ((nx-ny+1)*ny > 10000) {
e$B$NJ}$,$h$5$=$&$J5$$,!#e(B

e$B$H$3$m$Ge(B bigmul1 e$B$r8+$F5$$E$$$?$N$G$9$,e(B

for (i = 0; i < RBIGNUM_LEN(x); i++) {
    ...
    dd = BDIGITS(x)[i];
    ...
    for (j = 0; j < RBIGNUM_LEN(y); j++) {
        BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j];

e$B$H%k!<%WCf$Ge(B BDIGITS() e$B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2se(B
BDIGITS() e$B$Ge(B
RBIGNUM_EMBED_FLAG e$B$N%A%’%C%/$,F~$k$N$G%k!<%WA0$Ke(B
xds = BDIGITS(x);
yds = BDIGITS(y);
e$B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#e(B