chiark - git - mdw - secnet/blob - scaf.c

   1 /* -*-c-*-
   2  *
   3  * Simple scalar fields
   4  *
   5  * (c) 2017 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of secnet.
  11  * See README for full list of copyright holders.
  12  *
  13  * secnet is free software; you can redistribute it and/or modify it
  14  * under the terms of the GNU General Public License as published by
  15  * the Free Software Foundation; either version d of the License, or
  16  * (at your option) any later version.
  17  *
  18  * secnet is distributed in the hope that it will be useful, but
  19  * WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21  * General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU General Public License
  24  * version 3 along with secnet; if not, see
  25  * https://www.gnu.org/licenses/gpl.html.
  26  *
  27  * This file was originally part of Catacomb, but has been automatically
  28  * modified for incorporation into secnet: see `import-catacomb-crypto'
  29  * for details.
  30  *
  31  * Catacomb is free software; you can redistribute it and/or modify
  32  * it under the terms of the GNU Library General Public License as
  33  * published by the Free Software Foundation; either version 2 of the
  34  * License, or (at your option) any later version.
  35  *
  36  * Catacomb is distributed in the hope that it will be useful,
  37  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  38  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  39  * GNU Library General Public License for more details.
  40  *
  41  * You should have received a copy of the GNU Library General Public
  42  * License along with Catacomb; if not, write to the Free
  43  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  44  * MA 02111-1307, USA.
  45  */
  46
  47 /*----- Header files ------------------------------------------------------*/
  48
  49 #include <string.h>
  50
  51 #include "scaf.h"
  52
  53 /*----- Debugging utilties ------------------------------------------------*/
  54
  55 #ifdef SCAF_DEBUG
  56
  57 #include <stdio.h>
  58
  59 #include "mp.h"
  60 #include "mpint.h"
  61 #include "mptext.h"
  62
  63 static void scaf_dump(const char *what, const scaf_piece *x,
  64                       size_t npiece, size_t piecewd)
  65 {
  66   mp *y = MP_ZERO, *t = MP_NEW;
  67   size_t i;
  68   unsigned o = 0;
  69
  70   for (i = 0; i < npiece; i++) {
  71     t = mp_fromuint64(t, x[i]);
  72     t = mp_lsl(t, t, o);
  73     y = mp_add(y, y, t);
  74     o += piecewd;
  75   }
  76   printf(";; %s", what); MP_PRINT("", y); putchar('\n');
  77   mp_drop(y); mp_drop(t);
  78 }
  79
  80 static void scaf_dumpdbl(const char *what, const scaf_dblpiece *x,
  81                       size_t npiece, size_t piecewd)
  82 {
  83   mp *y = MP_ZERO, *t = MP_NEW;
  84   size_t i;
  85   unsigned o = 0;
  86
  87   for (i = 0; i < npiece; i++) {
  88     t = mp_fromuint64(t, x[i]);
  89     t = mp_lsl(t, t, o);
  90     y = mp_add(y, y, t);
  91     o += piecewd;
  92   }
  93   printf(";; %s", what); MP_PRINT("", y); putchar('\n');
  94   mp_drop(y); mp_drop(t);
  95 }
  96
  97 #endif
  98
  99 /*----- Main code ---------------------------------------------------------*/
 100
 101 /* --- @scaf_load@ --- *
 102  *
 103  * Arguments:   @scaf_piece *z@ = where to write the result
 104  *              @const octet *b@ = source buffer to read
 105  *              @size_t sz@ = size of the source buffer
 106  *              @size_t npiece@ = number of pieces to read
 107  *              @unsigned piecewd@ = nominal width of pieces in bits
 108  *
 109  * Returns:     ---
 110  *
 111  * Use:         Loads a little-endian encoded scalar into a vector @z@ of
 112  *              single-precision pieces.
 113  */
 114
 115 void scaf_load(scaf_piece *z, const octet *b, size_t sz,
 116                size_t npiece, unsigned piecewd)
 117 {
 118   uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
 119   unsigned i, j, n;
 120
 121   for (i = j = n = 0, a = 0; i < sz; i++) {
 122     a |= b[i] << n; n += 8;
 123     if (n >= piecewd) {
 124       z[j++] = a&m; a >>= piecewd; n -= piecewd;
 125       if (j >= npiece) return;
 126     }
 127   }
 128   z[j++] = a;
 129   while (j < npiece) z[j++] = 0;
 130 }
 131
 132 /* --- @scaf_loaddbl@ --- *
 133  *
 134  * Arguments:   @scaf_dblpiece *z@ = where to write the result
 135  *              @const octet *b@ = source buffer to read
 136  *              @size_t sz@ = size of the source buffer
 137  *              @size_t npiece@ = number of pieces to read
 138  *              @unsigned piecewd@ = nominal width of pieces in bits
 139  *
 140  * Returns:     ---
 141  *
 142  * Use:         Loads a little-endian encoded scalar into a vector @z@ of
 143  *              double-precision pieces.
 144  */
 145
 146 void scaf_loaddbl(scaf_dblpiece *z, const octet *b, size_t sz,
 147                   size_t npiece, unsigned piecewd)
 148 {
 149   uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
 150   unsigned i, j, n;
 151
 152   for (i = j = n = 0, a = 0; i < sz; i++) {
 153     a |= b[i] << n; n += 8;
 154     if (n >= piecewd) {
 155       z[j++] = a&m; a >>= piecewd; n -= piecewd;
 156       if (j >= npiece) return;
 157     }
 158   }
 159   z[j++] = a;
 160   while (j < npiece) z[j++] = 0;
 161 }
 162
 163 /* --- @scaf_store@ --- *
 164  *
 165  * Arguments:   @octet *b@ = buffer to fill in
 166  *              @size_t sz@ = size of the buffer
 167  *              @const scaf_piece *x@ = scalar to store
 168  *              @size_t npiece@ = number of pieces in @x@
 169  *              @unsigned piecewd@ = nominal width of pieces in bits
 170  *
 171  * Returns:     ---
 172  *
 173  * Use:         Stores a scalar in a vector of single-precison pieces as a
 174  *              little-endian vector of bytes.
 175  */
 176
 177 void scaf_store(octet *b, size_t sz, const scaf_piece *x,
 178                 size_t npiece, unsigned piecewd)
 179 {
 180   uint32 a;
 181   unsigned i, j, n;
 182
 183   for (i = j = n = 0, a = 0; i < npiece; i++) {
 184     a |= x[i] << n; n += piecewd;
 185     while (n >= 8) {
 186       b[j++] = a&0xffu; a >>= 8; n -= 8;
 187       if (j >= sz) return;
 188     }
 189   }
 190   b[j++] = a;
 191   memset(b + j, 0, sz - j);
 192 }
 193
 194 /* --- @scaf_mul@ --- *
 195  *
 196  * Arguments:   @scaf_dblpiece *z@ = where to put the answer
 197  *              @const scaf_piece *x, *y@ = the operands
 198  *              @size_t npiece@ = the length of the operands
 199  *
 200  * Returns:     ---
 201  *
 202  * Use:         Multiply two scalars.  The destination must have space for
 203  *              @2*npiece@ pieces (though the last one will always be zero).
 204  *              The result is not reduced.
 205  */
 206
 207 void scaf_mul(scaf_dblpiece *z, const scaf_piece *x, const scaf_piece *y,
 208               size_t npiece)
 209 {
 210   unsigned i, j;
 211
 212   for (i = 0; i < 2*npiece; i++) z[i] = 0;
 213
 214   for (i = 0; i < npiece; i++)
 215     for (j = 0; j < npiece; j++)
 216       z[i + j] += (scaf_dblpiece)x[i]*y[j];
 217 }
 218
 219 /* --- @scaf_reduce@ --- *
 220  *
 221  * Arguments:   @scaf_piece *z@ = where to write the result
 222  *              @const scaf_dblpiece *x@ = the operand to reduce
 223  *              @const scaf_piece *l@ = the modulus, in internal format
 224  *              @const scaf_piece *mu@ = scaled approximation to @1/l@
 225  *              @size_t npiece@ = number of pieces in @l@
 226  *              @unsigned piecewd@ = nominal width of a piece in bits
 227  *              @scaf_piece *scratch@ = @3*npiece@ scratch pieces
 228  *
 229  * Returns:     ---
 230  *
 231  * Use:         Reduce @x@ (a vector of @2*npiece@ double-precision pieces)
 232  *              modulo @l@ (a vector of @npiece@ single-precision pieces),
 233  *              writing the result to @z@.
 234  *
 235  *              Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%.  The
 236  *              operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in
 237  *              @npiece + 1@ pieces.  Furthermore, we must have
 238  *              %$3 l < B^n$%.  (Fiddle with %$w$% if necessary.)
 239  */
 240
 241 void scaf_reduce(scaf_piece *z, const scaf_dblpiece *x,
 242                  const scaf_piece *l, const scaf_piece *mu,
 243                  size_t npiece, unsigned piecewd, scaf_piece *scratch)
 244 {
 245   unsigned i, j;
 246   scaf_piece *t = scratch, *q = scratch + 2*npiece;
 247   scaf_piece u, m = ((scaf_piece)1 << piecewd) - 1;
 248   scaf_dblpiece c;
 249
 250   /* This here is the hard part.
 251    *
 252    * Let w = PIECEWD, let n = NPIECE, and let B = 2^w.  We must have
 253    * B^(n-1) <= l < B^n.
 254    *
 255    * The argument MU contains pieces of the quantity µ = floor(B^2n/l), which
 256    * is a scaled approximation to 1/l.  We'll calculate
 257    *
 258    *    q = floor(µ floor(x/B^(n-1))/B^(n+1))
 259    *
 260    * which is an underestimate of x/l.
 261    *
 262    * With a bit more precision: by definition, u - 1 < floor(u) <= u.  Hence,
 263    *
 264    *    B^2n/l - 1 < µ <= B^2/l
 265    *
 266    * and
 267    *
 268    *    x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1)
 269    *
 270    * Multiplying these together, and dividing through by B^(n+1), gives
 271    *
 272    *    floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <=
 273    *            q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l)
 274    *
 275    * Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and
 276    * B^(n-1)/l are each less than 1; hence
 277    *
 278    *    floor(x/l) - 2 <= q <= floor(x/l) <= x/l
 279    *
 280    * Now we set r = x - q l.  Certainly, r == x (mod l); and
 281    *
 282    *    0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n
 283    */
 284
 285   /* Before we start on the fancy stuff, we need to resolve the pending
 286    * carries in x.  We'll be doing the floor division by just ignoring some
 287    * of the pieces, and it would be bad if we missed some significant bits.
 288    * Of course, this means that we don't actually have to store the low
 289    * NPIECE - 1 pieces of the result.
 290    */
 291   for (i = 0, c = 0; i < 2*npiece; i++)
 292     { c += x[i]; t[i] = c&m; c >>= piecewd; }
 293
 294   /* Now we calculate q.  If we calculate this in product-scanning order, we
 295    * can avoid having to store the low NPIECE + 1 pieces of the product as
 296    * long as we keep track of the carry out properly.  Conveniently, NMU =
 297    * NPIECE + 1, which keeps the loop bounds easy in the first pass.
 298    *
 299    * Furthermore, because we know that r fits in NPIECE pieces, we only need
 300    * the low NPIECE pieces of q.
 301    */
 302   for (i = 0, c = 0; i < npiece + 1; i++) {
 303     for (j = 0; j <= i; j++)
 304       c += (scaf_dblpiece)t[j + npiece - 1]*mu[i - j];
 305     c >>= piecewd;
 306   }
 307   for (i = 0; i < npiece; i++) {
 308     for (j = i + 1; j < npiece + 1; j++)
 309       c += (scaf_dblpiece)t[j + npiece - 1]*mu[npiece + 1 + i - j];
 310     q[i] = c&m; c >>= piecewd;
 311   }
 312
 313   /* Next, we calculate r - q l in z.  Product-scanning seems to be working
 314    * out for us, and this time it will save us needing a large temporary
 315    * space for the product q l as we go.  On the downside, we have to track
 316    * the carries from the multiplication and subtraction separately.
 317    *
 318    * Notice that the result r is at most NPIECE pieces long, so we can stop
 319    * once we have that many.
 320    */
 321   u = 1; c = 0;
 322   for (i = 0; i < npiece; i++) {
 323     for (j = 0; j <= i; j++) c += (scaf_dblpiece)q[j]*l[i - j];
 324     u += t[i] + ((scaf_piece)(c&m) ^ m);
 325     z[i] = u&m; u >>= piecewd; c >>= piecewd;
 326   }
 327
 328   /* Finally, two passes of conditional subtraction.  Calculate t = z - l; if
 329    * there's no borrow out the top, then update z = t; otherwise leave t
 330    * alone.
 331    */
 332   for (i = 0; i < 2; i++) {
 333     for (j = 0, u = 1; j < npiece; j++) {
 334       u += z[j] + (l[j] ^ m);
 335       t[j] = u&m; u >>= piecewd;
 336     }
 337     for (j = 0, u = -u; j < npiece; j++) z[j] = (t[j]&u) | (z[j]&~u);
 338   }
 339 }
 340
 341 /*----- That's all, folks -------------------------------------------------*/