3 * Low level multiprecision arithmetic
5 * (c) 1999 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Catacomb.
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
28 #ifndef CATACOMB_MPX_H
29 #define CATACOMB_MPX_H
35 /*----- The idea ----------------------------------------------------------*
37 * This file provides functions and macros which work on vectors of words as
38 * unsigned multiprecision integers. The interface works in terms of base
39 * and limit pointers (i.e., a pointer to the start of a vector, and a
40 * pointer just past its end) rather than base pointer and length, because
41 * that requires more arithmetic and state to work on.
43 * The interfaces are slightly bizarre in other ways. Try to use the
44 * higher-level functions where you can: they're rather better designed to
45 * actually be friendly and useful.
48 /*----- Header files ------------------------------------------------------*/
52 #ifndef CATACOMB_MPW_H
56 /*----- General manipulation ----------------------------------------------*/
58 /* --- @MPX_SHRINK@ --- *
60 * Arguments: @const mpw *v@ = pointer to vector of words
61 * @const mpw *vl@ = (updated) current limit of vector
63 * Use: Shrinks down the limit of a multiprecision integer vector.
66 #define MPX_SHRINK(v, vl) do { \
67 const mpw *_vv = (v), *_vvl = (vl); \
68 while (_vvl > _vv && !_vvl[-1]) \
73 /* --- @MPX_BITS@ --- *
75 * Arguments: @unsigned long b@ = result variable
76 * @const mpw *v@ = pointer to array of words
77 * @const mpw *vl@ = limit of vector (from @MPX_SHRINK@)
79 * Use: Calculates the number of bits in a multiprecision value.
82 #define MPX_BITS(b, v, vl) do { \
83 const mpw *_v = (v), *_vl = (vl); \
84 MPX_SHRINK(_v, _vl); \
88 unsigned long _b = MPW_BITS * (_vl - _v - 1) + 1; \
90 unsigned _k = MPW_P2; \
102 /* --- @MPX_OCTETS@ --- *
104 * Arguments: @size_t o@ = result variable
105 * @const mpw *v, *vl@ = pointer to array of words
107 * Use: Calculates the number of octets in a multiprecision value.
110 #define MPX_OCTETS(o, v, vl) do { \
112 MPX_BITS(_bb, (v), (vl)); \
113 (o) = (_bb + 7) >> 3; \
116 /* --- @MPX_OCTETS2C@ --- *
118 * Arguments: @size_t o@ = result variable
119 * @const mpw *v, *vl@ = pointer to array of words
121 * Use: Calculates the number of octets in a multiprecision value, if
122 * you represent it as two's complement.
125 #define MPX_OCTETS2C(o, v, vl) do { \
127 MPX_BITS(_bb, (v), (vl)); \
128 (o) = (_bb >> 3) + 1; \
131 /* --- @MPX_COPY@ --- *
133 * Arguments: @dv, dvl@ = destination vector base and limit
134 * @av, avl@ = source vector base and limit
136 * Use: Copies a multiprecision integer.
139 #define MPX_COPY(dv, dvl, av, avl) do { \
140 mpw *_dv = (dv), *_dvl = (dvl); \
141 size_t _dn = _dvl - _dv; \
142 const mpw *_av = (av), *_avl = (avl); \
143 size_t _an = _avl - _av; \
146 memset(_dv, 0, MPWS(_dn - _an)); \
147 } else if (_an >= _dn) \
148 memmove(_dv, _av, MPWS(_dn)); \
150 memmove(_dv, _av, MPWS(_an)); \
151 memset(_dv + _an, 0, MPWS(_dn - _an)); \
155 /* --- @MPX_ZERO@ --- *
157 * Arguments: @v, vl@ = base and limit of vector to clear
159 * Use: Zeroes the area between the two vector pointers.
162 #define MPX_ZERO(v, vl) do { \
163 mpw *_v = (v), *_vl = (vl); \
165 memset(_v, 0, MPWS(_vl - _v)); \
168 /* --- @MPX_ONE@ --- *
170 * Arguments: @v, vl@ = base and limit of vector to clear
172 * Use: Fills the area between the two vector pointers with ones.
175 #define MPX_ONE(v, vl) do { \
177 const mpw *_vl = (vl); \
182 /*----- Loading and storing -----------------------------------------------*/
184 /* --- @mpx_storel@ --- *
186 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
187 * @void *p@ = pointer to octet array
188 * @size_t sz@ = size of octet array
192 * Use: Stores an MP in an octet array, least significant octet
193 * first. High-end octets are silently discarded if there
194 * isn't enough space for them.
197 extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
198 void */*p*/, size_t /*sz*/);
200 /* --- @mpx_loadl@ --- *
202 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
203 * @const void *p@ = pointer to octet array
204 * @size_t sz@ = size of octet array
208 * Use: Loads an MP in an octet array, least significant octet
209 * first. High-end octets are ignored if there isn't enough
213 extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
214 const void */*p*/, size_t /*sz*/);
216 /* --- @mpx_storeb@ --- *
218 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
219 * @void *p@ = pointer to octet array
220 * @size_t sz@ = size of octet array
224 * Use: Stores an MP in an octet array, most significant octet
225 * first. High-end octets are silently discarded if there
226 * isn't enough space for them.
229 extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
230 void */*p*/, size_t /*sz*/);
232 /* --- @mpx_loadb@ --- *
234 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
235 * @const void *p@ = pointer to octet array
236 * @size_t sz@ = size of octet array
240 * Use: Loads an MP in an octet array, most significant octet
241 * first. High-end octets are ignored if there isn't enough
245 extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
246 const void */*p*/, size_t /*sz*/);
248 /* --- @mpx_storel2cn@ --- *
250 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
251 * @void *pp@ = pointer to octet array
252 * @size_t sz@ = size of octet array
256 * Use: Stores a negative MP in an octet array, least significant
257 * octet first, as two's complement. High-end octets are
258 * silently discarded if there isn't enough space for them.
259 * This obviously makes the output bad.
262 extern void mpx_storel2cn(const mpw */*v*/, const mpw */*vl*/,
263 void */*p*/, size_t /*sz*/);
265 /* --- @mpx_loadl2cn@ --- *
267 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
268 * @const void *pp@ = pointer to octet array
269 * @size_t sz@ = size of octet array
273 * Use: Loads a negative MP in an octet array, least significant
274 * octet first, as two's complement. High-end octets are
275 * ignored if there isn't enough space for them. This probably
276 * means you made the wrong choice coming here.
279 extern void mpx_loadl2cn(mpw */*v*/, mpw */*vl*/,
280 const void */*p*/, size_t /*sz*/);
282 /* --- @mpx_storeb2cn@ --- *
284 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
285 * @void *pp@ = pointer to octet array
286 * @size_t sz@ = size of octet array
290 * Use: Stores a negative MP in an octet array, most significant
291 * octet first, as two's complement. High-end octets are
292 * silently discarded if there isn't enough space for them,
293 * which probably isn't what you meant.
296 extern void mpx_storeb2cn(const mpw */*v*/, const mpw */*vl*/,
297 void */*p*/, size_t /*sz*/);
299 /* --- @mpx_loadb2cn@ --- *
301 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
302 * @const void *pp@ = pointer to octet array
303 * @size_t sz@ = size of octet array
307 * Use: Loads a negative MP in an octet array, most significant octet
308 * first as two's complement. High-end octets are ignored if
309 * there isn't enough space for them. This probably means you
310 * chose this function wrongly.
313 extern void mpx_loadb2cn(mpw */*v*/, mpw */*vl*/,
314 const void */*p*/, size_t /*sz*/);
317 /*----- Logical shifting --------------------------------------------------*/
319 /* --- @mpx_lsl@ --- *
321 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
322 * @const mpw *av, *avl@ = source vector base and limit
323 * @size_t n@ = number of bit positions to shift by
327 * Use: Performs a logical shift left operation on an integer.
330 extern void mpx_lsl(mpw */*dv*/, mpw */*dvl*/,
331 const mpw */*av*/, const mpw */*avl*/,
334 /* --- @mpx_lslc@ --- *
336 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
337 * @const mpw *av, *avl@ = source vector base and limit
338 * @size_t n@ = number of bit positions to shift by
342 * Use: Performs a logical shift left operation on an integer, only
343 * it fills in the bits with ones instead of zeroes.
346 extern void mpx_lslc(mpw */*dv*/, mpw */*dvl*/,
347 const mpw */*av*/, const mpw */*avl*/,
350 /* --- @mpx_lsr@ --- *
352 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
353 * @const mpw *av, *avl@ = source vector base and limit
354 * @size_t n@ = number of bit positions to shift by
358 * Use: Performs a logical shift right operation on an integer.
361 extern void mpx_lsr(mpw */*dv*/, mpw */*dvl*/,
362 const mpw */*av*/, const mpw */*avl*/,
365 /*----- Bitwise operations ------------------------------------------------*/
367 /* --- @mpx_bitop@ --- *
369 * Arguments: @mpw *dv, *dvl@ = destination vector
370 * @const mpw *av, *avl@ = first source vector
371 * @const mpw *bv, *bvl@ = second source vector
375 * Use: Provide the dyadic boolean functions. The functions are
376 * named after the truth table they generate:
383 #define MPX_DOBIN(what) \
384 what(0000) what(0001) what(0010) what(0011) \
385 what(0100) what(0101) what(0110) what(0111) \
386 what(1000) what(1001) what(1010) what(1011) \
387 what(1100) what(1101) what(1110) what(1111)
389 #define MPX_BITDECL(string) \
390 extern void mpx_bit##string(mpw */*dv*/, mpw */*dvl*/, \
391 const mpw */*av*/, const mpw */*avl*/, \
392 const mpw */*bv*/, const mpw */*bvl*/);
393 MPX_DOBIN(MPX_BITDECL)
395 /* --- @mpx_[n]and@, @mpx_[n]or@, @mpx_xor@ --- *
397 * Synonyms for the commonly-used functions above.
400 #define mpx_and mpx_bit0001
401 #define mpx_or mpx_bit0111
402 #define mpx_nand mpx_bit1110
403 #define mpx_nor mpx_bit1000
404 #define mpx_xor mpx_bit0110
406 /* --- @mpx_not@ --- *
408 * Arguments: @mpw *dv, *dvl@ = destination vector
409 * @const mpw *av, *avl@ = first source vector
416 extern void mpx_not(mpw */*dv*/, mpw */*dvl*/,
417 const mpw */*av*/, const mpw */*avl*/);
419 /*----- Unsigned arithmetic -----------------------------------------------*/
421 /* --- @mpx_2c@ --- *
423 * Arguments: @mpw *dv, *dvl@ = destination vector
424 * @const mpw *v, *vl@ = source vector
428 * Use: Calculates the two's complement of @v@.
431 extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
432 const mpw */*v*/, const mpw */*vl*/);
434 /* --- @mpx_ueq@ --- *
436 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
437 * @const mpw *bv, *bvl@ = second argument vector base and limit
439 * Returns: Nonzero if the two vectors are equal.
441 * Use: Performs an unsigned integer test for equality.
444 extern int mpx_ueq(const mpw */*av*/, const mpw */*avl*/,
445 const mpw */*bv*/, const mpw */*bvl*/);
447 /* --- @mpx_ucmp@ --- *
449 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
450 * @const mpw *bv, *bvl@ = second argument vector base and limit
452 * Returns: Less than, equal to, or greater than zero depending on
453 * whether @a@ is less than, equal to or greater than @b@,
456 * Use: Performs an unsigned integer comparison.
459 #define MPX_UCMP(av, avl, op, dv, dvl) \
460 (mpx_ucmp((av), (avl), (dv), (dvl)) op 0)
462 extern int mpx_ucmp(const mpw */*av*/, const mpw */*avl*/,
463 const mpw */*bv*/, const mpw */*bvl*/);
465 /* --- @mpx_uadd@ --- *
467 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
468 * @const mpw *av, *avl@ = first addend vector base and limit
469 * @const mpw *bv, *bvl@ = second addend vector base and limit
473 * Use: Performs unsigned integer addition. If the result overflows
474 * the destination vector, high-order bits are discarded. This
475 * means that two's complement addition happens more or less for
476 * free, although that's more a side-effect than anything else.
477 * The result vector may be equal to either or both source
478 * vectors, but may not otherwise overlap them.
481 extern void mpx_uadd(mpw */*dv*/, mpw */*dvl*/,
482 const mpw */*av*/, const mpw */*avl*/,
483 const mpw */*bv*/, const mpw */*bvl*/);
485 /* --- @mpx_uaddn@ --- *
487 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
488 * @mpw n@ = other addend
492 * Use: Adds a small integer to a multiprecision number.
495 #define MPX_UADDN(dv, dvl, n) do { \
496 mpw *_ddv = (dv), *_ddvl = (dvl); \
499 while (_c && _ddv < _ddvl) { \
500 mpd _x = (mpd)*_ddv + (mpd)_c; \
502 _c = _x >> MPW_BITS; \
506 extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
508 /* --- @mpx_uaddnlsl@ --- *
510 * Arguments: @mpw *dv, *dvl@ = destination and first argument vector
511 * @mpw a@ = second argument
512 * @unsigned o@ = offset in bits
516 * Use: Computes %$d + 2^o a$%. If the result overflows then
517 * high-order bits are discarded, as usual. We must have
518 * @0 < o < MPW_BITS@.
521 extern void mpx_uaddnlsl(mpw */*dv*/, mpw */*dvl*/,
522 mpw /*a*/, unsigned /*o*/);
524 /* --- @mpx_usub@ --- *
526 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
527 * @const mpw *av, *avl@ = first argument vector base and limit
528 * @const mpw *bv, *bvl@ = second argument vector base and limit
532 * Use: Performs unsigned integer subtraction. If the result
533 * overflows the destination vector, high-order bits are
534 * discarded. This means that two's complement subtraction
535 * happens more or less for free, although that's more a side-
536 * effect than anything else. The result vector may be equal to
537 * either or both source vectors, but may not otherwise overlap
541 extern void mpx_usub(mpw */*dv*/, mpw */*dvl*/,
542 const mpw */*av*/, const mpw */*avl*/,
543 const mpw */*bv*/, const mpw */*bvl*/);
545 /* --- @mpx_usubn@ --- *
547 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
552 * Use: Subtracts a small integer from a multiprecision number.
555 #define MPX_USUBN(dv, dvl, n) do { \
556 mpw *_ddv = (dv), *_ddvl = (dvl); \
559 while (_ddv < _ddvl) { \
560 mpd _x = (mpd)*_ddv - (mpd)_c; \
562 if (_x >> MPW_BITS) \
569 extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
571 /* --- @mpx_usubnlsl@ --- *
573 * Arguments: @mpw *dv, *dvl@ = destination and first argument vector
574 * @mpw a@ = second argument
575 * @unsigned o@ = offset in bits
579 * Use: Computes %$d - 2^o a$%. If the result overflows then
580 * high-order bits are discarded, as usual, so you get two's
581 * complement. Which might be what you wanted... We must have
582 * @0 < o < MPW_BITS@.
585 extern void mpx_usubnlsl(mpw */*dv*/, mpw */*dvl*/,
586 mpw /*a*/, unsigned /*o*/);
588 /* --- @mpx_umul@ --- *
590 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
591 * @const mpw *av, *avl@ = multiplicand vector base and limit
592 * @const mpw *bv, *bvl@ = multiplier vector base and limit
596 * Use: Performs unsigned integer multiplication. If the result
597 * overflows the desination vector, high-order bits are
598 * discarded. The result vector may not overlap the argument
599 * vectors in any way.
602 extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
603 const mpw */*av*/, const mpw */*avl*/,
604 const mpw */*bv*/, const mpw */*bvl*/);
606 /* --- @mpx_umuln@ --- *
608 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
609 * @const mpw *av, *avl@ = multiplicand vector base and limit
610 * @mpw m@ = multiplier
614 * Use: Multiplies a multiprecision integer by a single-word value.
615 * The destination and source may be equal. The destination
616 * is completely cleared after use.
619 #define MPX_UMULN(dv, dvl, av, avl, m) do { \
620 mpw *_dv = (dv), *_dvl = (dvl); \
621 const mpw *_av = (av), *_avl = (avl); \
625 while (_av < _avl) { \
629 _x = (mpd)_m * (mpd)*_av++ + _c; \
631 _c = _x >> MPW_BITS; \
635 MPX_ZERO(_dv, _dvl); \
639 extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
640 const mpw */*av*/, const mpw */*avl*/, mpw m);
642 /* --- @mpx_umlan@ --- *
644 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
645 * @const mpw *av, *avl@ = multiplicand vector base and limit
646 * @mpw m@ = multiplier
650 * Use: Multiplies a multiprecision integer by a single-word value
651 * and adds the result to an accumulator.
654 #define MPX_UMLAN(dv, dvl, av, avl, m) do { \
655 mpw *_dv = (dv), *_dvl = (dvl); \
656 const mpw *_av = (av), *_avl = (avl); \
660 while (_dv < _dvl && _av < _avl) { \
662 _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
664 _cc = _x >> MPW_BITS; \
666 MPX_UADDN(_dv, _dvl, _cc); \
669 extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
670 const mpw */*av*/, const mpw */*avl*/, mpw m);
672 /* --- @mpx_usqr@ --- *
674 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
675 * @const mpw *av, *av@ = source vector base and limit
679 * Use: Performs unsigned integer squaring. The result vector must
680 * not overlap the source vector in any way.
683 extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
684 const mpw */*av*/, const mpw */*avl*/);
686 /* --- @mpx_udiv@ --- *
688 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
689 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
690 * @const mpw *dv, *dvl@ = divisor vector base and limit
691 * @mpw *sv, *svl@ = scratch workspace
695 * Use: Performs unsigned integer division. If the result overflows
696 * the quotient vector, high-order bits are discarded. (Clearly
697 * the remainder vector can't overflow.) The various vectors
698 * may not overlap in any way. Yes, I know it's a bit odd
699 * requiring the dividend to be in the result position but it
700 * does make some sense really. The remainder must have
701 * headroom for at least two extra words. The scratch space
702 * must be at least one word larger than the divisor.
705 extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
706 const mpw */*dv*/, const mpw */*dvl*/,
707 mpw */*sv*/, mpw */*svl*/);
709 /* --- @mpx_udivn@ --- *
711 * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap
713 * @const mpw *rv, *rvl@ = dividend
714 * @mpw d@ = single-precision divisor
716 * Returns: Remainder after divison.
718 * Use: Performs a single-precision division operation.
721 extern mpw mpx_udivn(mpw */*qv*/, mpw */*qvl*/,
722 const mpw */*rv*/, const mpw */*rvl*/, mpw /*d*/);
724 /*----- Karatsuba multiplication algorithms -------------------------------*/
726 /* --- @MPK_THRESH@ --- *
728 * This is the limiting length for using Karatsuba algorithms. It's best to
729 * use the simpler classical multiplication method on numbers smaller than
730 * this. It is unsafe to make this constant less than four (i.e., the
731 * algorithms will fail).
734 #define MPK_THRESH 32
736 /* --- @mpx_kmul@ --- *
738 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
739 * @const mpw *av, *avl@ = pointer to first argument
740 * @const mpw *bv, *bvl@ = pointer to second argument
741 * @mpw *sv, *svl@ = pointer to scratch workspace
745 * Use: Multiplies two multiprecision integers using Karatsuba's
746 * algorithm. This is rather faster than traditional long
747 * multiplication (e.g., @mpx_umul@) on large numbers, although
748 * more expensive on small ones.
750 * The destination must be three times as large as the larger
751 * argument. The scratch space must be five times as large as
752 * the larger argument.
755 extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
756 const mpw */*av*/, const mpw */*avl*/,
757 const mpw */*bv*/, const mpw */*bvl*/,
758 mpw */*sv*/, mpw */*svl*/);
760 /* --- @mpx_ksqr@ --- *
762 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
763 * @const mpw *av, *avl@ = pointer to first argument
764 * @mpw *sv, *svl@ = pointer to scratch workspace
768 * Use: Squares a multiprecision integers using something similar to
769 * Karatsuba's multiplication algorithm. This is rather faster
770 * than traditional long multiplication (e.g., @mpx_umul@) on
771 * large numbers, although more expensive on small ones, and
772 * rather simpler than full-blown Karatsuba multiplication.
774 * The destination must be three times as large as the larger
775 * argument. The scratch space must be five times as large as
776 * the larger argument.
779 extern void mpx_ksqr(mpw */*dv*/, mpw */*dvl*/,
780 const mpw */*av*/, const mpw */*avl*/,
781 mpw */*sv*/, mpw */*svl*/);
783 /*----- That's all, folks -------------------------------------------------*/