e$B1sF#$H?=$7$^$9!#e(B
Bignum#* e$B$Ke(B FFT e$B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#e(B
10 e$B?J$Ge(B 100000
e$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#e(B
e$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#e(B
$ cat …/test_fft.rb
require “benchmark”
n = 10**10000
puts Benchmark.measure { 100.times { n * n } }
n = 10**100000
puts Benchmark.measure { 10.times { n * n } }
n = 10**1000000
puts Benchmark.measure { 1.times { n * n } }
e$B8=>ue(B
$ ./ruby.org …/test_fft.rb
2.590000 0.000000 2.590000 ( 2.874859)
26.040000 0.010000 26.050000 ( 28.927583)
266.540000 0.040000 266.580000 (276.491797)
FFT e$BHGe(B
$ ./ruby.fft …/test_fft.rb
2.630000 0.010000 2.640000 ( 2.636339)
2.790000 0.040000 2.830000 ( 2.836020)
6.270000 0.090000 6.360000 ( 6.368478)
FFT e$B$N<BAu$O652J=qDL$j$G$9!#4p?t$be(B 65536 (=2**(8sizeof(short)))
e$B$+e(B
256 (=2**(8sizeof(char))) e$B$N7h$aBG$A$G$9!#%U!<%j%(JQ49$H$+$h$/e(B
e$B$o$+$C$F$J$$$N$G!"%P%0$C$F$?$i$9$_$^$;$s!#>$7$$J}$K8!>Z$d:F<BAu$re(B
e$B$7$FD:$1$?$i:G9b$G$9!#e(B
e$B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)e(B
Index: bignum.c
— bignum.c (revision 14308)
+++ bignum.c (working copy)
@@ -1442,10 +1442,145 @@
}
}
+static void
+big_fft(double *ar, double *ai, int al, double theta)
+{
- int m, mh, i, j, k, irev;
- double wr, wi, xr, xi;
- i = 0;
- for (j = 1; j < al - 1; j++) {
- for (k = al >> 1; k > (i ^= k); k >>= 1);
- if (j < i) {
-
xr = ar[j];
-
xi = ai[j];
-
ar[j] = ar[i];
-
ai[j] = ai[i];
-
ar[i] = xr;
-
ai[i] = xi;
- }
- }
- for (mh = 1; (m = mh << 1) <= al; mh = m) {
- irev = 0;
- for (i = 0; i < al; i += m) {
-
wr = cos(theta * irev);
-
wi = sin(theta * irev);
-
for (k = al >> 2; k > (irev ^= k); k >>= 1);
-
for (j = i; j < mh + i; j++) {
- k = j + mh;
- xr = ar[j] - ar[k];
- xi = ai[j] - ai[k];
- ar[j] += ar[k];
- ai[j] += ai[k];
- ar[k] = wr * xr - wi * xi;
- ai[k] = wr * xi + wi * xr;
-
}
- }
- }
+}
+static void
+big_digits_to_bases(int base, double *xr, long rlen, BDIGIT *xds, long
dlen)
+{
- long i, j, k, div = 1 << (base * CHAR_BIT);
- BDIGIT v;
- k = 0;
- for (i = 0; i < dlen; i++) {
- v = xds[i];
- for (j = 0; j < sizeof(BDIGIT) / base; j++) {
-
xr[k++] = (double) (v % div);
-
v /= div;
- }
- }
- for (; k < rlen; k++) xr[k] = 0;
+}
+static void
+big_bases_to_digits(int base, BDIGIT *zds, long dlen, double *xr)
+{
- long i, j, k, div = 1 << (base * CHAR_BIT);
- BDIGIT v, s;
- double x, xm;
- x = 0;
- i = j = 0;
- v = 0;
- s = 1;
- for (k = 0;; k++) {
- x += floor(xr[k] + 0.5);
- xm = fmod(x, div);
- v |= ((BDIGIT) xm) * s;
- x = (x - xm) / div;
- s *= div;
- if (++j >= sizeof(BDIGIT) / base) {
-
zds[i++] = v;
-
if (i == dlen) return;
-
j = 0;
-
v = 0;
-
s = 1;
- }
- }
+}
static VALUE
+big_mul_fft(VALUE x, VALUE y)
+{
- long i, len, base;
- VALUE z;
- double *xr, *xi, *yr, *yi, pi = atan(1.0) * 4.0;
- /* calculate FFT length and decide the base of radix */
- base = sizeof(short);
- i = (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1) * sizeof(BDIGIT) / base;
- for (len = 1; len < i; len *= 2);
- if (len > 131072) {
- len *= sizeof(short) / sizeof(char);
- base = sizeof(char);
- }
- /* allocate two arraies of complexes */
- xr = ALLOC_N(double, len);
- xi = ALLOC_N(double, len);
- yr = ALLOC_N(double, len);
- yi = ALLOC_N(double, len);
- /* initialize the arraies */
- big_digits_to_bases(base, xr, len, BDIGITS(x), RBIGNUM_LEN(x));
- big_digits_to_bases(base, yr, len, BDIGITS(y), RBIGNUM_LEN(y));
- for (i = 0; i < len; i++) xi[i] = yi[i] = 0;
- /* perform FFT */
- big_fft(xr, xi, len, 2 * pi / len);
- big_fft(yr, yi, len, 2 * pi / len);
- /* multiply the arraies */
- for (i = 0; i < len; i++) {
- double t = xr[i] * yr[i] - xi[i] * yi[i];
- xi[i] = xr[i] * yi[i] + xi[i] * yr[i];
- xr[i] = t;
- }
- free(yr);
- free(yi);
- /* perform IFFT */
- big_fft(xr, xi, len, -2 * pi / len);
- free(xi);
- for (i = 0; i < len; i++) xr[i] /= len;
- /* make a result bignum */
- len = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
- z = bignew(len, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
- big_bases_to_digits(base, BDIGITS(z), len, xr);
- free(xr);
- return z;
+}
+static VALUE
rb_big_mul0(VALUE x, VALUE y)
{
- long i, j;
-
long i, j, nx, ny, len;
BDIGIT_DBL n = 0;
VALUE z;
BDIGIT *zds;
@@ -1465,6 +1600,22 @@
return rb_num_coerce_bin(x, y);
} -
/* estimate time */
-
for (i = nx = 0; i < RBIGNUM_LEN(x); i++) if (BDIGITS(x)[i]) nx++;
-
for (i = ny = 0; i < RBIGNUM_LEN(y); i++) if (BDIGITS(y)[i]) ny++;
-
for (len = 1; RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > len; len *= 2);
-
/* swap x and y (when y is more sparse than x) */
-
if (ny < nx) {
-
z = x; x = y; y = z; nx = ny;
-
}
-
/* multiply by FFT (if FFT seems to be faster) */
-
if ((double) nx * RBIGNUM_LEN(y) > (double) len * log(len) * 30) {
-
return big_mul_fft(x, y);
-
}
-
/* multiply in a normal way */
j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);