[Ecm-commits] r2606 - in trunk: . x86_64/core2

cvs commits ecm-commits at lists.gforge.inria.fr
Thu Feb 12 11:46:27 CET 2015


Author: zimmerma
Date: 2015-02-12 11:46:26 +0100 (Thu, 12 Feb 2015)
New Revision: 2606

Modified:
   trunk/ecm-impl.h
   trunk/ks-multiply.c
   trunk/sp.h
   trunk/spv.c
   trunk/tune.c
   trunk/x86_64/core2/params.h
Log:
re-implement Karatsuba


Modified: trunk/ecm-impl.h
===================================================================
--- trunk/ecm-impl.h	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/ecm-impl.h	2015-02-12 10:46:26 UTC (rev 2606)
@@ -66,7 +66,7 @@
 #endif
 extern size_t mpn_mul_lo_threshold[];
 
-#define TUNE_LIST_MUL_N_MAX_SIZE 16
+#define TUNE_LIST_MUL_N_MAX_SIZE 32
 
 #include <stdio.h> /* needed for "FILE *" */
 #include <limits.h>
@@ -487,8 +487,8 @@
 /* ks-multiply.c */
 #define list_mul_n_basecase __ECM(list_mul_n_basecase)
 void list_mul_n_basecase (listz_t, listz_t, listz_t, unsigned int);
-#define list_mul_tc __ECM(list_mul_tc)
-void list_mul_tc (listz_t, listz_t, unsigned int, listz_t, unsigned int);
+#define list_mul_n_karatsuba __ECM(list_mul_n_karatsuba)
+void list_mul_n_karatsuba (listz_t, listz_t, listz_t, unsigned int);
 #define list_mul_n_KS1 __ECM(list_mul_n_KS1)
 void list_mul_n_KS1 (listz_t, listz_t, listz_t, unsigned int);
 #define list_mul_n_KS2 __ECM(list_mul_n_KS2)

Modified: trunk/ks-multiply.c
===================================================================
--- trunk/ks-multiply.c	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/ks-multiply.c	2015-02-12 10:46:26 UTC (rev 2606)
@@ -65,6 +65,7 @@
     }
 }
 
+/* R <- A * B where A = A[0] + A[1]*x + ... + A[n-1]*x^(n-1), idem for B */
 void
 list_mul_n_basecase (listz_t R, listz_t A, listz_t B, unsigned int n)
 {
@@ -76,18 +77,6 @@
       return;
     }
 
-  if (n == 2) /* Karatsuba */
-    {
-      mpz_add (R[0], A[0], A[1]);
-      mpz_add (R[2], B[0], B[1]);
-      mpz_mul (R[1], R[0], R[2]);
-      mpz_mul (R[0], A[0], B[0]);
-      mpz_mul (R[2], A[1], B[1]);
-      mpz_sub (R[1], R[1], R[0]);
-      mpz_sub (R[1], R[1], R[2]);
-      return;
-    }
-
   for (i = 0; i < n; i++)
     for (j = 0; j < n; j++)
       {
@@ -98,6 +87,109 @@
       }
 }
 
+static void
+list_mul_n_kara2 (listz_t R, listz_t A, listz_t B)
+{
+  mpz_add (R[0], A[0], A[1]);
+  mpz_add (R[2], B[0], B[1]);
+  mpz_mul (R[1], R[0], R[2]);
+  mpz_mul (R[0], A[0], B[0]);
+  mpz_mul (R[2], A[1], B[1]);
+  mpz_sub (R[1], R[1], R[0]);
+  mpz_sub (R[1], R[1], R[2]);
+}
+
+/* R[0..4] <- A[0..2] * B[0..2] in 7 multiplies */
+static void
+list_mul_n_kara3 (listz_t R, listz_t A, listz_t B, listz_t T)
+{
+  mpz_add (T[0], A[0], A[2]);
+  mpz_add (R[0], B[0], B[2]);
+  mpz_mul (R[2], T[0], R[0]); /* (A0+A2)*(B0+B2) */
+  mpz_mul (R[3], T[0], B[1]); /* (A0+A2)*B1 */
+  mpz_mul (R[4], A[1], R[0]); /* A1*(B0+B2) */
+  mpz_add (R[3], R[3], R[4]); /* (A0+A2)*B1+A1*(B0+B2) */
+  list_mul_n_kara2 (T, A, B);
+  mpz_sub (R[2], R[2], T[0]); /* A0*A2+A2*B0+A2*B2 */
+  mpz_sub (R[3], R[3], T[1]); /* A2*B1+A1*B2 */
+  mpz_add (R[2], R[2], T[2]); /* A0*A2+A2*B0+A2*B2+A1*B1 */
+  mpz_swap (R[0], T[0]);      /* A0*B0 */
+  mpz_swap (R[1], T[1]);      /* A0*B1+A1*B0 */
+  mpz_mul (R[4], A[2], B[2]); /* A2*B2 */
+  mpz_sub (R[2], R[2], R[4]); /* A0*A2+A2*B0+A1*B1 */
+}
+
+/* Assume n >= 2. T is a scratch space of enough entries. */
+static void
+list_mul_n_karatsuba_aux (listz_t R, listz_t A, listz_t B, unsigned int n,
+                          listz_t T)
+{
+  unsigned int h, l;
+
+  if (n == 1)
+    {
+      list_mul_n_basecase (R, A, B, n);
+      return;
+    }
+
+  if (n == 2)
+    {
+      list_mul_n_kara2 (R, A, B);
+      return;
+    }
+
+  if (n == 3)
+    {
+      list_mul_n_kara3 (R, A, B, T);
+      return;
+    }
+
+  h = n / 2;
+  l = n - h;
+  list_add (R, A, A + l, h);
+  list_add (R + l, B, B + l, h);
+  if (h < l)
+    {
+      mpz_set (R[h], A[h]);
+      mpz_set (R[l + h], B[h]);
+    }
+  list_mul_n_karatsuba_aux (T, R, R + l, l, T + 2 * l - 1);
+  list_mul_n_karatsuba_aux (R, A, B, l, T + 2 * l - 1);
+  /* {R,2l-1} = Al * Bl */
+  list_mul_n_karatsuba_aux (R + 2 * l, A + l, B + l, h, T + 2 * l - 1);
+  /* {R+2l,2h-1} = Ah * Bh */
+  /* T will contain Al*Bh+Ah*Bl, it thus suffices to compute its low n-1
+     coefficients */
+  list_sub (T, T, R, n - 1);
+  list_sub (T, T, R + 2 * l, 2 * h - 1);
+  mpz_set_ui (R[2 * l - 1], 0);
+  list_add (R + l, R + l, T, n - 1);
+}
+
+static unsigned int
+list_mul_n_mem (unsigned int n)
+{
+  if (n == 1)
+    return 0;
+  else
+    {
+      unsigned int k = (n + 1) / 2;
+      return 2 * k - 1 + list_mul_n_mem (k);
+    }
+}
+
+void
+list_mul_n_karatsuba (listz_t R, listz_t A, listz_t B, unsigned int n)
+{
+  listz_t T;
+  unsigned int s;
+
+  s = list_mul_n_mem (n);
+  T = init_list (s);
+  list_mul_n_karatsuba_aux (R, A, B, n, T);
+  clear_list (T, s);
+}
+
 /* Classical one-point Kronecker-Schoenhage substitution.
    Notes:
     - this code aligns the coeffs at limb boundaries - if instead we aligned
@@ -280,9 +372,11 @@
      2 : list_mul_n_KS1
      3 : list_mul_n_KS2 */
   best = (n < TUNE_LIST_MUL_N_MAX_SIZE) ? T[n] : 3;
-    
+
   if (best == 0)
     list_mul_n_basecase (R, A, B, n);
+  else if (best == 1)
+    list_mul_n_karatsuba (R, A, B, n);
   else if (best == 2)
     list_mul_n_KS1 (R, A, B, n);
   else

Modified: trunk/sp.h
===================================================================
--- trunk/sp.h	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/sp.h	2015-02-12 10:46:26 UTC (rev 2606)
@@ -475,20 +475,15 @@
 
 /* add */
 void spv_add (spv_t, spv_t, spv_t, spv_size_t, sp_t);
-void spv_add_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t);
 
 /* subtract */
-void spv_sub (spv_t, spv_t, spv_t, spv_size_t, sp_t);
-void spv_sub_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t);
 void spv_neg (spv_t, spv_t, spv_size_t, sp_t);
 
 /* pointwise multiplication */
 void spv_pwmul (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t);
-void spv_pwmul_rev (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t);
 void spv_mul_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t, sp_t);
 
 void spv_random (spv_t, spv_size_t, sp_t);
-int spv_cmp (spv_t, spv_t, spv_size_t);
 
 /* ntt_gfp */
 

Modified: trunk/spv.c
===================================================================
--- trunk/spv.c	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/spv.c	2015-02-12 10:46:26 UTC (rev 2606)
@@ -33,6 +33,7 @@
  * other than for temporary variables. Unless otherwise specified, any
  * of the input pointers can be equal. */
 
+#ifdef WANT_ASSERT
 int
 spv_verify (spv_t x, spv_size_t len, const sp_t m)
 {
@@ -52,6 +53,7 @@
   
   return 1;
 }
+#endif
 
 /* r = x */
 void
@@ -98,18 +100,6 @@
   memset (r, 0, len * sizeof (sp_t));
 }
 
-int
-spv_cmp (spv_t x, spv_t y, spv_size_t len)
-{
-  spv_size_t i;
-
-  for (i = 0; i < len; i++)
-    if (x[i] != y[i])
-      return 1;
-
-  return 0;
-}
-
 /* r = x + y */
 void
 spv_add (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m)
@@ -123,39 +113,6 @@
     r[i] = sp_add (x[i], y[i], m);
 }
 
-/* r = [x[0] + y, x[1] + y, ... ] */
-void
-spv_add_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m)
-{
-  spv_size_t i;
-
-  for (i = 0; i < len; i++)
-    r[i] = sp_add (x[i], c, m);
-}
-
-/* r = x - y */
-void
-spv_sub (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m)
-{
-  spv_size_t i;
-  
-  ASSERT (r >= x + len || x >= r);
-  ASSERT (r >= y + len || y >= r);
-   
-  for (i = 0; i < len; i++)
-    r[i] = sp_sub (x[i], y[i], m);
-}
-
-/* r = [x[0] - y, x[1] - y, ... ] */
-void
-spv_sub_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m)
-{
-  spv_size_t i;
-
-  for (i = 0; i < len; i++)
-    r[i] = sp_sub (x[i], c, m);
-}
-
 /* r = [-x[0], -x[1], ... ] */
 void
 spv_neg (spv_t r, spv_t x, spv_size_t len, sp_t m)
@@ -324,20 +281,6 @@
     r[i] = sp_mul (x[i], y[i], m, d);
 }
 
-/* Pointwise multiplication, second input is read in reverse
- * r = [x[0] * y[len - 1], x[1] * y[len - 2], ... x[len - 1] * y[0]] */
-void
-spv_pwmul_rev (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m, sp_t d)
-{
-  spv_size_t i;
-  
-  ASSERT (r >= x + len || x >= r);
-  ASSERT (r >= y + len || y >= r);
-
-  for (i = 0; i < len; i++)
-    r[i] = sp_mul (x[i], y[len - 1 - i], m, d);
-}
-
 /* dst = src * y */
 void
 spv_mul_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m, sp_t d)

Modified: trunk/tune.c
===================================================================
--- trunk/tune.c	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/tune.c	2015-02-12 10:46:26 UTC (rev 2606)
@@ -441,6 +441,13 @@
         printf (" basecase:%.2e", st[0]);
       best[n] = 0;
       __k = 1;
+      TUNE_FUNC_LOOP(list_mul_n_karatsuba(z, x, y, n));
+      st[1] = (double) __st / (double) __k;
+      if (tune_verbose)
+        printf (" karatsuba:%.2e", st[1]);
+      if (st[1] < st[0])
+        best[n] = 1;
+      __k = 1;
       TUNE_FUNC_LOOP(list_mul_n_KS1(z, x, y, n));
       st[2] = (double) __st / (double) __k;
       if (tune_verbose)
@@ -459,6 +466,7 @@
         }
       if (tune_verbose)     
         printf (" best:%s\n", (best[n] == 0) ? "basecase"
+                : (best[n] == 1) ? "kara"
                 : (best[n] == 2) ? "KS1" : "KS2");
     }
   printf ("#define LIST_MUL_TABLE {0");

Modified: trunk/x86_64/core2/params.h
===================================================================
--- trunk/x86_64/core2/params.h	2015-02-11 22:01:17 UTC (rev 2605)
+++ trunk/x86_64/core2/params.h	2015-02-12 10:46:26 UTC (rev 2606)
@@ -36,4 +36,4 @@
 
 #endif
 
-#define LIST_MUL_TABLE {0,0,0,0,0,0,0,0,0,0,3,3,3,3,3,3}
+#define LIST_MUL_TABLE {0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}



More information about the Ecm-commits mailing list