Faster Bignum#*

Yusuke_ENDOH · December 18, 2007, 4:19pm

e$B1sF#$H?=$7$^$9!#e(B

Bignum#* e$B$Ke(B FFT e$B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#e(B
10 e$B?J$Ge(B 100000
e$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#e(B
e$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#e(B

$ cat …/test_fft.rb
require “benchmark”

n = 10**10000
puts Benchmark.measure { 100.times { n * n } }

n = 10**100000
puts Benchmark.measure { 10.times { n * n } }

n = 10**1000000
puts Benchmark.measure { 1.times { n * n } }

e$B8=>ue(B

$ ./ruby.org …/test_fft.rb
2.590000 0.000000 2.590000 ( 2.874859)
26.040000 0.010000 26.050000 ( 28.927583)
266.540000 0.040000 266.580000 (276.491797)

FFT e$BHGe(B

$ ./ruby.fft …/test_fft.rb
2.630000 0.010000 2.640000 ( 2.636339)
2.790000 0.040000 2.830000 ( 2.836020)
6.270000 0.090000 6.360000 ( 6.368478)

FFT e$B$N<BAu$O652J=qDL$j$G$9!#4p?t$be(B 65536 (=2**(8sizeof(short)))
e$B$+e(B
256 (=2**(8sizeof(char))) e$B$N7h$aBG$A$G$9!#%U!<%j%(JQ49$H$+$h$/e(B
e$B$o$+$C$F$J$$$N$G!"%P%0$C$F$?$i$9$_$^$;$s!#>$7$$J}$K8!>Z$d:F<BAu$re(B
e$B$7$FD:$1$?$i:G9b$G$9!#e(B

e$B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)e(B

Index: bignum.c

— bignum.c (revision 14308)
+++ bignum.c (working copy)
@@ -1442,10 +1442,145 @@
}
}

+static void
+big_fft(double *ar, double *ai, int al, double theta)
+{

int m, mh, i, j, k, irev;
double wr, wi, xr, xi;
i = 0;
for (j = 1; j < al - 1; j++) {
for (k = al >> 1; k > (i ^= k); k >>= 1);
if (j < i) {
```
 xr = ar[j];
```
```
 xi = ai[j];
```
```
 ar[j] = ar[i];
```
```
 ai[j] = ai[i];
```
```
 ar[i] = xr;
```
```
 ai[i] = xi;
```
}
}
for (mh = 1; (m = mh << 1) <= al; mh = m) {
irev = 0;
for (i = 0; i < al; i += m) {
```
 wr = cos(theta * irev);
```
```
 wi = sin(theta * irev);
```

 for (k = al >> 2; k > (irev ^= k); k >>= 1);

```
 for (j = i; j < mh + i; j++) {
```
k = j + mh;
xr = ar[j] - ar[k];
xi = ai[j] - ai[k];
ar[j] += ar[k];
ai[j] += ai[k];
ar[k] = wr * xr - wi * xi;
ai[k] = wr * xi + wi * xr;
```
 }
```
}
}
+}

+static void
+big_digits_to_bases(int base, double *xr, long rlen, BDIGIT *xds, long
dlen)
+{

long i, j, k, div = 1 << (base * CHAR_BIT);
BDIGIT v;
k = 0;
for (i = 0; i < dlen; i++) {
v = xds[i];
for (j = 0; j < sizeof(BDIGIT) / base; j++) {
```
 xr[k++] = (double) (v % div);
```
```
 v /= div;
```
}
}
for (; k < rlen; k++) xr[k] = 0;
+}

+static void
+big_bases_to_digits(int base, BDIGIT *zds, long dlen, double *xr)
+{

long i, j, k, div = 1 << (base * CHAR_BIT);
BDIGIT v, s;
double x, xm;
x = 0;
i = j = 0;
v = 0;
s = 1;
for (k = 0;; k++) {
x += floor(xr[k] + 0.5);
xm = fmod(x, div);
v |= ((BDIGIT) xm) * s;
x = (x - xm) / div;
s *= div;
if (++j >= sizeof(BDIGIT) / base) {
```
 zds[i++] = v;
```
```
 if (i == dlen) return;
```
```
 j = 0;
```
```
 v = 0;
```
```
 s = 1;
```
}
}
+}

static VALUE
+big_mul_fft(VALUE x, VALUE y)
+{

long i, len, base;
VALUE z;
double *xr, *xi, *yr, *yi, pi = atan(1.0) * 4.0;
/* calculate FFT length and decide the base of radix */
base = sizeof(short);
i = (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1) * sizeof(BDIGIT) / base;
for (len = 1; len < i; len *= 2);
if (len > 131072) {
len *= sizeof(short) / sizeof(char);
base = sizeof(char);
}
/* allocate two arraies of complexes */
xr = ALLOC_N(double, len);
xi = ALLOC_N(double, len);
yr = ALLOC_N(double, len);
yi = ALLOC_N(double, len);
/* initialize the arraies */
big_digits_to_bases(base, xr, len, BDIGITS(x), RBIGNUM_LEN(x));
big_digits_to_bases(base, yr, len, BDIGITS(y), RBIGNUM_LEN(y));
for (i = 0; i < len; i++) xi[i] = yi[i] = 0;
/* perform FFT */
big_fft(xr, xi, len, 2 * pi / len);
big_fft(yr, yi, len, 2 * pi / len);
/* multiply the arraies */
for (i = 0; i < len; i++) {
double t = xr[i] * yr[i] - xi[i] * yi[i];
xi[i] = xr[i] * yi[i] + xi[i] * yr[i];
xr[i] = t;
}
free(yr);
free(yi);
/* perform IFFT */
big_fft(xr, xi, len, -2 * pi / len);
free(xi);
for (i = 0; i < len; i++) xr[i] /= len;
/* make a result bignum */
len = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(len, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
big_bases_to_digits(base, BDIGITS(z), len, xr);
free(xr);
return z;
+}

+static VALUE
rb_big_mul0(VALUE x, VALUE y)
{

long i, j;

long i, j, nx, ny, len;
BDIGIT_DBL n = 0;
VALUE z;
BDIGIT *zds;
@@ -1465,6 +1600,22 @@
return rb_num_coerce_bin(x, y);
}
/* estimate time */
for (i = nx = 0; i < RBIGNUM_LEN(x); i++) if (BDIGITS(x)[i]) nx++;
for (i = ny = 0; i < RBIGNUM_LEN(y); i++) if (BDIGITS(y)[i]) ny++;
for (len = 1; RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > len; len *= 2);
/* swap x and y (when y is more sparse than x) */
if (ny < nx) {
z = x; x = y; y = z; nx = ny;
}
/* multiply by FFT (if FFT seems to be faster) */
if ((double) nx * RBIGNUM_LEN(y) > (double) len * log(len) * 30) {
return big_mul_fft(x, y);
}
/* multiply in a normal way */
j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);

Yusuke_ENDOH · December 18, 2007, 6:04pm

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:32629] faster Bignum#*”
on Wed, 19 Dec 2007 00:18:17 +0900, “Yusuke ENDOH” [email protected]
writes:

|Bignum#* e$B$Ke(B FFT e$B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#e(B
|10 e$B?J$Ge(B 100000 e$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#e(B
|e$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#e(B

e$B$&!<$s!“$”$k$N$+$J!#%/%j%9%^%9$,=*$o$C$F$+$i<h$j9~$`J}8~$G9Me(B
e$B$($^$9!#e(B

Yusuke_ENDOH · December 19, 2007, 6:50am

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:32632] Re: faster Bignum#*”
on Wed, 19 Dec 2007 12:49:02 +0900, “Yusuke ENDOH” [email protected]
writes:

|e$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!“2x$7$$e(B
|e$B<{MW$O$?$^$K$”$j$^$9e(B (e$B>Pe(B)

e$B$J$k$[$I!#e(Bw

|e$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$de(B to_s e$B$r$7$?$H$-e(B Ctrl-C e$B$,$-$+$J$/$J$ke(B
|e$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#e(B
|e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B rb_thread_blocking_region e$B$Ge(B
|e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

e$B;d$O<h$j9~$_$?$$$N$G$9$,!"$3$N%Q%C%A$N5;=QE*BEEv@-$,H=CG$G$-e(B
e$B$^$;$s!#$5$5$@$/$s$,e(BOKe$B$H8@$C$?$i<h$j9~$`$H$$$&$3$H$G!#e(B

Yusuke_ENDOH · December 19, 2007, 4:50am

e$B1sF#$G$9!#e(B

07/12/19 e$B$Ke(B Yukihiro M.[email protected]
e$B$5$s$O=q$-$^$7$?e(B:

e$B$($^$9!#e(B
e$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!“2x$7$$e(B
e$B<{MW$O$?$^$K$”$j$^$9e(B (e$B>Pe(B)

e$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$de(B to_s e$B$r$7$?$H$-e(B Ctrl-C
e$B$,$-$+$J$/$J$ke(B
e$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#e(B
e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B
rb_thread_blocking_region e$B$Ge(B
e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

Index: bignum.c

— bignum.c (revision 14313)
+++ bignum.c (working copy)
@@ -1442,34 +1442,32 @@
}
}

+static void
+rb_big_stop(void *ptr)
+{

VALUE stop = (VALUE)ptr;
*stop = Qtrue;
+}

+struct big_mul_struct {

VALUE x, y, stop;
+};

static VALUE
-rb_big_mul0(VALUE x, VALUE y)
+bigmul1(void *ptr)
{

struct big_mul_struct bms = (struct big_mul_struct)ptr;
long i, j;
BDIGIT_DBL n = 0;

VALUE z;

VALUE x = bms->x, y = bms->y, z;
BDIGIT *zds;

switch (TYPE(y)) {
```
 case T_FIXNUM:
```
y = rb_int2big(FIX2LONG(y));
break;
```
 case T_BIGNUM:
```
break;
```
 case T_FLOAT:
```
return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
```
 default:
```
return rb_num_coerce_bin(x, y);
}
j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);
while (j–) zds[j] = 0;
for (i = 0; i < RBIGNUM_LEN(x); i++) {

if (bms->stop) return Qnil;
BDIGIT_DBL dd = BDIGITS(x)[i];
if (dd == 0) continue;
n = 0;
@@ -1486,6 +1484,42 @@
return z;
}

+static VALUE
+rb_big_mul0(VALUE x, VALUE y)
+{

struct big_mul_struct bms;
VALUE z;
switch (TYPE(y)) {
```
 case T_FIXNUM:
```
y = rb_int2big(FIX2LONG(y));
break;
```
 case T_BIGNUM:
```
break;
```
 case T_FLOAT:
```
return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
```
 default:
```
return rb_num_coerce_bin(x, y);
}
bms.x = x;
bms.y = y;
bms.stop = Qfalse;
if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {
VALUE stop = Qfalse;
z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop);
}
else {
z = bigmul1(&bms);
}
return z;
+}

/*

call-seq:
```
big * other  => Numeric
```

@@ -1499,9 +1533,15 @@
return bignorm(rb_big_mul0(x, y));
}

-static void
-bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+struct big_div_struct {

VALUE x, y, *divp, *modp, stop;
+};

+static VALUE
+bigdivrem1(void *ptr)
{

struct big_div_struct bds = (struct big_div_struct)ptr;
VALUE x = bds->x, y = bds->y, *divp = bds->divp, *modp = bds->modp;
long nx = RBIGNUM_LEN(x), ny = RBIGNUM_LEN(y);
long i, j;
VALUE yy, z;
@@ -1575,6 +1615,7 @@

j = nx==ny?nx+1:nx;
do {
if (bds->stop) return Qnil;
if (zds[j] == yds[ny-1]) q = BIGRAD-1;
else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]);
if (q) {
@@ -1627,6 +1668,27 @@
}
}

+static VALUE
+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+{

struct big_div_struct bds;
VALUE z;
bds.x = x;
bds.y = y;
bds.divp = divp;
bds.modp = modp;
bds.stop = Qfalse;
if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {
VALUE stop = Qfalse;
z = rb_thread_blocking_region(bigdivrem1, &bds, rb_big_stop,
&bds.stop);
}
else {
z = bigdivrem1(&bds);
}
return z;
+}

static void
bigdivmod(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
{

Yusuke_ENDOH · September 2, 2008, 10:57am

e$BK-J!$G$9!#e(B

In [ruby-dev:33139]

e$B$=$3$GBg$-$$?t$Ne(B rb_big_mul0 e$B$He(B divrem e$B$re(B rb_thread_blocking_region e$B$Ge(B
e$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#e(B

+rb_big_mul0(VALUE x, VALUE y)
…

if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {

+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
…

if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {

e$B!J8=:_;H$C$F$$$k>h;;!"=|;;%"%k%4%j%:%`$N!K7W;;NL$+$i$9$k$He(B
if (nx*ny > 10000) {
e$B$He(B
if ((nx-ny+1)*ny > 10000) {
e$B$NJ}$,$h$5$=$&$J5$$,!#e(B

e$B$H$3$m$Ge(B bigmul1 e$B$r8+$F5$$E$$$?$N$G$9$,e(B

for (i = 0; i < RBIGNUM_LEN(x); i++) {
    ...
    dd = BDIGITS(x)[i];
    ...
    for (j = 0; j < RBIGNUM_LEN(y); j++) {
        BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j];

e$B$H%k!<%WCf$Ge(B BDIGITS() e$B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2se(B
BDIGITS() e$B$Ge(B
RBIGNUM_EMBED_FLAG e$B$N%A%’%C%/$,F~$k$N$G%k!<%WA0$Ke(B
xds = BDIGITS(x);
yds = BDIGITS(y);
e$B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#e(B

Faster Bignum#*

e$B8=>ue(B

FFT e$BHGe(B

e$B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)e(B

Index: bignum.c

Index: bignum.c

e$B$H%k!<%WCf$Ge(B BDIGITS() e$B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2se(B BDIGITS() e$B$Ge(B RBIGNUM_EMBED_FLAG e$B$N%A%’%C%/$,F~$k$N$G%k!<%WA0$Ke(B xds = BDIGITS(x); yds = BDIGITS(y); e$B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#e(B

e$B$H%k!<%WCf$Ge(B BDIGITS() e$B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2se(B
BDIGITS() e$B$Ge(B
RBIGNUM_EMBED_FLAG e$B$N%A%’%C%/$,F~$k$N$G%k!<%WA0$Ke(B
xds = BDIGITS(x);
yds = BDIGITS(y);
e$B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#e(B