Commit | Line | Data |
---|---|---|
d03ab969 | 1 | /* -*-c-*- |
d03ab969 | 2 | * |
3 | * Low-level multiprecision arithmetic | |
4 | * | |
5 | * (c) 1999 Straylight/Edgeware | |
6 | */ | |
7 | ||
45c0fd36 | 8 | /*----- Licensing notice --------------------------------------------------* |
d03ab969 | 9 | * |
10 | * This file is part of Catacomb. | |
11 | * | |
12 | * Catacomb is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU Library General Public License as | |
14 | * published by the Free Software Foundation; either version 2 of the | |
15 | * License, or (at your option) any later version. | |
45c0fd36 | 16 | * |
d03ab969 | 17 | * Catacomb is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU Library General Public License for more details. | |
45c0fd36 | 21 | * |
d03ab969 | 22 | * You should have received a copy of the GNU Library General Public |
23 | * License along with Catacomb; if not, write to the Free | |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | * MA 02111-1307, USA. | |
26 | */ | |
27 | ||
d03ab969 | 28 | /*----- Header files ------------------------------------------------------*/ |
29 | ||
444083ae MW |
30 | #include "config.h" |
31 | ||
c8a2f9ef | 32 | #include <assert.h> |
d03ab969 | 33 | #include <stdio.h> |
34 | #include <stdlib.h> | |
35 | #include <string.h> | |
36 | ||
37 | #include <mLib/bits.h> | |
23bbea75 | 38 | #include <mLib/macros.h> |
d03ab969 | 39 | |
444083ae | 40 | #include "dispatch.h" |
d03ab969 | 41 | #include "mptypes.h" |
42 | #include "mpx.h" | |
75263f25 | 43 | #include "bitops.h" |
d03ab969 | 44 | |
45 | /*----- Loading and storing -----------------------------------------------*/ | |
46 | ||
0c9ebe47 MW |
47 | /* --- These are all variations on a theme --- * |
48 | * | |
49 | * Essentially we want to feed bits into a shift register, @ibits@ bits at a | |
50 | * time, and extract them @obits@ bits at a time whenever there are enough. | |
51 | * Of course, @i@ and @o@ will, in general, be different sizes, and we don't | |
52 | * necessarily know which is larger. | |
53 | * | |
54 | * During an operation, we have a shift register @w@ and a most-recent input | |
55 | * @t@. Together, these hold @bits@ significant bits of input. We arrange | |
56 | * that @bits < ibits + obits <= 2*MPW_BITS@, so we can get away with using | |
57 | * an @mpw@ for both of these quantitities. | |
58 | */ | |
59 | ||
60 | /* --- @MPX_GETBITS@ --- * | |
61 | * | |
62 | * Arguments: @ibits@ = width of input units, in bits | |
63 | * @obits@ = width of output units, in bits | |
64 | * @iavail@ = condition expression: is input data available? | |
65 | * @getbits@ = function or macro: set argument to next input | |
66 | * | |
67 | * Use: Read an input unit into @t@ and update the necessary | |
68 | * variables. | |
69 | * | |
70 | * It is assumed on entry that @bits < obits@. On exit, we have | |
71 | * @bits < ibits + obits@, and @t@ is live. | |
72 | */ | |
73 | ||
74 | #define MPX_GETBITS(ibits, obits, iavail, getbits) do { \ | |
75 | if (!iavail) goto flush; \ | |
76 | if (bits >= ibits) w |= t << (bits - ibits); \ | |
77 | getbits(t); \ | |
78 | bits += ibits; \ | |
79 | } while (0) | |
80 | ||
81 | /* --- @MPX_PUTBITS@ --- * | |
82 | * | |
83 | * Arguments: @ibits@ = width of input units, in bits | |
84 | * @obits@ = width of output units, in bits | |
85 | * @oavail@ = condition expression: is output space available? | |
86 | * @putbits@ = function or macro: write its argument to output | |
87 | * | |
88 | * Use: Emit an output unit, and update the necessary variables. If | |
89 | * the output buffer is full, then force an immediate return. | |
90 | * | |
91 | * We assume that @bits < ibits + obits@, and that @t@ is only | |
92 | * relevant if @bits >= ibits@. (The @MPX_GETBITS@ macro | |
93 | * ensures that this is true.) | |
94 | */ | |
95 | ||
96 | #define SHRW(w, b) ((b) < MPW_BITS ? (w) >> (b) : 0) | |
97 | ||
98 | #define MPX_PUTBITS(ibits, obits, oavail, putbits) do { \ | |
99 | if (!oavail) return; \ | |
100 | if (bits < ibits) { \ | |
101 | putbits(w); \ | |
102 | bits -= obits; \ | |
103 | w = SHRW(w, obits); \ | |
104 | } else { \ | |
105 | putbits(w | (t << (bits - ibits))); \ | |
106 | bits -= obits; \ | |
107 | if (bits >= ibits) w = SHRW(w, obits) | (t << (bits - ibits)); \ | |
108 | else w = SHRW(w, obits) | (t >> (ibits - bits)); \ | |
109 | t = 0; \ | |
110 | } \ | |
111 | } while (0) | |
112 | ||
113 | /* --- @MPX_LOADSTORE@ --- * | |
114 | * | |
115 | * Arguments: @name@ = name of function to create, without @mpx_@ prefix | |
116 | * @wconst@ = qualifiers for @mpw *@ arguments | |
117 | * @oconst@ = qualifiers for octet pointers | |
118 | * @decls@ = additional declarations needed | |
119 | * @ibits@ = width of input units, in bits | |
120 | * @iavail@ = condition expression: is input data available? | |
121 | * @getbits@ = function or macro: set argument to next input | |
122 | * @obits@ = width of output units, in bits | |
123 | * @oavail@ = condition expression: is output space available? | |
124 | * @putbits@ = function or macro: write its argument to output | |
850dc272 | 125 | * @fixfinal@ = statements to fix shift register at the end |
0c9ebe47 MW |
126 | * @clear@ = statements to clear remainder of output |
127 | * | |
128 | * Use: Generates a function to convert between a sequence of | |
129 | * multiprecision words and a vector of octets. | |
130 | * | |
131 | * The arguments @ibits@, @iavail@ and @getbits@ are passed on | |
132 | * to @MPX_GETBITS@; similarly, @obits@, @oavail@, and @putbits@ | |
133 | * are passed on to @MPX_PUTBITS@. | |
134 | * | |
135 | * The following variables are in scope: @v@ and @vl are the | |
136 | * current base and limit of the word vector; @p@ and @q@ are | |
137 | * the base and limit of the octet vector; @w@ and @t@ form the | |
138 | * shift register used during the conversion (see commentary | |
139 | * above); and @bits@ tracks the number of live bits in the | |
140 | * shift register. | |
141 | */ | |
142 | ||
143 | #define MPX_LOADSTORE(name, wconst, oconst, decls, \ | |
144 | ibits, iavail, getbits, obits, oavail, putbits, \ | |
850dc272 | 145 | fixfinal, clear) \ |
0c9ebe47 MW |
146 | \ |
147 | void mpx_##name(wconst mpw *v, wconst mpw *vl, \ | |
148 | oconst void *pp, size_t sz) \ | |
149 | { \ | |
150 | mpw t = 0, w = 0; \ | |
151 | oconst octet *p = pp, *q = p + sz; \ | |
152 | int bits = 0; \ | |
153 | decls \ | |
154 | \ | |
155 | for (;;) { \ | |
156 | while (bits < obits) MPX_GETBITS(ibits, obits, iavail, getbits); \ | |
157 | while (bits >= obits) MPX_PUTBITS(ibits, obits, oavail, putbits); \ | |
158 | } \ | |
159 | \ | |
160 | flush: \ | |
850dc272 MW |
161 | if (bits) { \ |
162 | fixfinal; \ | |
163 | while (bits > 0) MPX_PUTBITS(ibits, obits, oavail, putbits); \ | |
164 | } \ | |
0c9ebe47 MW |
165 | clear; \ |
166 | } | |
167 | ||
168 | #define EMPTY | |
169 | ||
170 | /* --- Macros for @getbits@ and @putbits@ --- */ | |
171 | ||
172 | #define GETMPW(t) do { t = *v++; } while (0) | |
173 | #define PUTMPW(x) do { *v++ = MPW(x); } while (0) | |
174 | ||
175 | #define GETOCTETI(t) do { t = *p++; } while (0) | |
176 | #define PUTOCTETD(x) do { *--q = U8(x); } while (0) | |
177 | ||
178 | #define PUTOCTETI(x) do { *p++ = U8(x); } while (0) | |
179 | #define GETOCTETD(t) do { t = *--q; } while (0) | |
180 | ||
181 | /* --- Machinery for two's complement I/O --- */ | |
182 | ||
183 | #define DECL_2CN \ | |
184 | unsigned c = 1; | |
185 | ||
186 | #define GETMPW_2CN(t) do { \ | |
187 | t = MPW(~*v++ + c); \ | |
188 | c = c && !t; \ | |
189 | } while (0) | |
190 | ||
191 | #define PUTMPW_2CN(t) do { \ | |
192 | mpw _t = MPW(~(t) + c); \ | |
193 | c = c && !_t; \ | |
194 | *v++ = _t; \ | |
195 | } while (0) | |
196 | ||
850dc272 MW |
197 | #define FIXFINALW_2CN do { \ |
198 | if (c && !w && !t); \ | |
199 | else if (bits == 8) t ^= ~(mpw)0xffu; \ | |
200 | else t ^= ((mpw)1 << (MPW_BITS - bits + 8)) - 256u; \ | |
0c9ebe47 MW |
201 | } while (0) |
202 | ||
203 | #define FLUSHO_2CN do { \ | |
850dc272 | 204 | memset(p, c ? 0 : 0xff, q - p); \ |
0c9ebe47 MW |
205 | } while (0) |
206 | ||
d03ab969 | 207 | /* --- @mpx_storel@ --- * |
208 | * | |
209 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 210 | * @void *pp@ = pointer to octet array |
d03ab969 | 211 | * @size_t sz@ = size of octet array |
212 | * | |
213 | * Returns: --- | |
214 | * | |
215 | * Use: Stores an MP in an octet array, least significant octet | |
216 | * first. High-end octets are silently discarded if there | |
217 | * isn't enough space for them. | |
218 | */ | |
219 | ||
0c9ebe47 MW |
220 | MPX_LOADSTORE(storel, const, EMPTY, EMPTY, |
221 | MPW_BITS, (v < vl), GETMPW, | |
222 | 8, (p < q), PUTOCTETI, | |
850dc272 | 223 | EMPTY, { memset(p, 0, q - p); }) |
d03ab969 | 224 | |
225 | /* --- @mpx_loadl@ --- * | |
226 | * | |
227 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 228 | * @const void *pp@ = pointer to octet array |
d03ab969 | 229 | * @size_t sz@ = size of octet array |
230 | * | |
231 | * Returns: --- | |
232 | * | |
233 | * Use: Loads an MP in an octet array, least significant octet | |
234 | * first. High-end octets are ignored if there isn't enough | |
235 | * space for them. | |
236 | */ | |
237 | ||
0c9ebe47 MW |
238 | MPX_LOADSTORE(loadl, EMPTY, const, EMPTY, |
239 | 8, (p < q), GETOCTETI, | |
240 | MPW_BITS, (v < vl), PUTMPW, | |
850dc272 | 241 | EMPTY, { MPX_ZERO(v, vl); }) |
0c9ebe47 | 242 | |
d03ab969 | 243 | |
244 | /* --- @mpx_storeb@ --- * | |
245 | * | |
246 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 247 | * @void *pp@ = pointer to octet array |
d03ab969 | 248 | * @size_t sz@ = size of octet array |
249 | * | |
250 | * Returns: --- | |
251 | * | |
252 | * Use: Stores an MP in an octet array, most significant octet | |
253 | * first. High-end octets are silently discarded if there | |
254 | * isn't enough space for them. | |
255 | */ | |
256 | ||
0c9ebe47 MW |
257 | MPX_LOADSTORE(storeb, const, EMPTY, EMPTY, |
258 | MPW_BITS, (v < vl), GETMPW, | |
259 | 8, (p < q), PUTOCTETD, | |
850dc272 | 260 | EMPTY, { memset(p, 0, q - p); }) |
d03ab969 | 261 | |
262 | /* --- @mpx_loadb@ --- * | |
263 | * | |
264 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 265 | * @const void *pp@ = pointer to octet array |
d03ab969 | 266 | * @size_t sz@ = size of octet array |
267 | * | |
268 | * Returns: --- | |
269 | * | |
270 | * Use: Loads an MP in an octet array, most significant octet | |
271 | * first. High-end octets are ignored if there isn't enough | |
272 | * space for them. | |
273 | */ | |
274 | ||
0c9ebe47 MW |
275 | MPX_LOADSTORE(loadb, EMPTY, const, EMPTY, |
276 | 8, (p < q), GETOCTETD, | |
277 | MPW_BITS, (v < vl), PUTMPW, | |
850dc272 | 278 | EMPTY, { MPX_ZERO(v, vl); }) |
d03ab969 | 279 | |
f09e814a | 280 | /* --- @mpx_storel2cn@ --- * |
281 | * | |
282 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
283 | * @void *pp@ = pointer to octet array | |
284 | * @size_t sz@ = size of octet array | |
285 | * | |
286 | * Returns: --- | |
287 | * | |
288 | * Use: Stores a negative MP in an octet array, least significant | |
289 | * octet first, as two's complement. High-end octets are | |
290 | * silently discarded if there isn't enough space for them. | |
291 | * This obviously makes the output bad. | |
292 | */ | |
293 | ||
0c9ebe47 MW |
294 | MPX_LOADSTORE(storel2cn, const, EMPTY, DECL_2CN, |
295 | MPW_BITS, (v < vl), GETMPW_2CN, | |
296 | 8, (p < q), PUTOCTETI, | |
850dc272 | 297 | EMPTY, { FLUSHO_2CN; }) |
f09e814a | 298 | |
299 | /* --- @mpx_loadl2cn@ --- * | |
300 | * | |
301 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
302 | * @const void *pp@ = pointer to octet array | |
303 | * @size_t sz@ = size of octet array | |
304 | * | |
305 | * Returns: --- | |
306 | * | |
307 | * Use: Loads a negative MP in an octet array, least significant | |
308 | * octet first, as two's complement. High-end octets are | |
309 | * ignored if there isn't enough space for them. This probably | |
310 | * means you made the wrong choice coming here. | |
311 | */ | |
312 | ||
0c9ebe47 MW |
313 | MPX_LOADSTORE(loadl2cn, EMPTY, const, DECL_2CN, |
314 | 8, (p < q), GETOCTETI, | |
315 | MPW_BITS, (v < vl), PUTMPW_2CN, | |
850dc272 | 316 | { FIXFINALW_2CN; }, { MPX_ZERO(v, vl); }) |
f09e814a | 317 | |
318 | /* --- @mpx_storeb2cn@ --- * | |
319 | * | |
320 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
321 | * @void *pp@ = pointer to octet array | |
322 | * @size_t sz@ = size of octet array | |
323 | * | |
324 | * Returns: --- | |
325 | * | |
326 | * Use: Stores a negative MP in an octet array, most significant | |
327 | * octet first, as two's complement. High-end octets are | |
328 | * silently discarded if there isn't enough space for them, | |
329 | * which probably isn't what you meant. | |
330 | */ | |
331 | ||
0c9ebe47 MW |
332 | MPX_LOADSTORE(storeb2cn, const, EMPTY, DECL_2CN, |
333 | MPW_BITS, (v < vl), GETMPW_2CN, | |
334 | 8, (p < q), PUTOCTETD, | |
850dc272 | 335 | EMPTY, { FLUSHO_2CN; }) |
f09e814a | 336 | |
337 | /* --- @mpx_loadb2cn@ --- * | |
338 | * | |
339 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
340 | * @const void *pp@ = pointer to octet array | |
341 | * @size_t sz@ = size of octet array | |
342 | * | |
343 | * Returns: --- | |
344 | * | |
345 | * Use: Loads a negative MP in an octet array, most significant octet | |
346 | * first as two's complement. High-end octets are ignored if | |
347 | * there isn't enough space for them. This probably means you | |
348 | * chose this function wrongly. | |
349 | */ | |
350 | ||
0c9ebe47 MW |
351 | MPX_LOADSTORE(loadb2cn, EMPTY, const, DECL_2CN, |
352 | 8, (p < q), GETOCTETD, | |
353 | MPW_BITS, (v < vl), PUTMPW_2CN, | |
850dc272 | 354 | { FIXFINALW_2CN; }, { MPX_ZERO(v, vl); }) |
f09e814a | 355 | |
d03ab969 | 356 | /*----- Logical shifting --------------------------------------------------*/ |
357 | ||
5ee480b5 | 358 | /* --- @MPX_SHIFT1@ --- * |
d03ab969 | 359 | * |
5ee480b5 MW |
360 | * Arguments: @init@ = initial accumulator value |
361 | * @out@ = expression to store in each output word | |
362 | * @next@ = expression for next accumulator value | |
d03ab969 | 363 | * |
5ee480b5 MW |
364 | * Use: Performs a single-position shift. The input is scanned |
365 | * right-to-left. In the expressions @out@ and @next@, the | |
366 | * accumulator is available in @w@ and the current input word is | |
367 | * in @t@. | |
d03ab969 | 368 | * |
5ee480b5 MW |
369 | * This macro is intended to be used in the @shift1@ argument of |
370 | * @MPX_SHIFTOP@, and expects variables describing the operation | |
371 | * to be set up accordingly. | |
d03ab969 | 372 | */ |
373 | ||
5ee480b5 MW |
374 | #define MPX_SHIFT1(init, out, next) do { \ |
375 | mpw t, w = (init); \ | |
376 | while (av < avl) { \ | |
377 | if (dv >= dvl) break; \ | |
378 | t = MPW(*av++); \ | |
379 | *dv++ = (out); \ | |
380 | w = (next); \ | |
381 | } \ | |
382 | if (dv < dvl) { *dv++ = MPW(w); MPX_ZERO(dv, dvl); } \ | |
383 | } while (0) | |
384 | ||
385 | /* --- @MPX_SHIFTW@ --- * | |
386 | * | |
387 | * Arguments: @max@ = the maximum shift (in words) which is nontrivial | |
388 | * @clear@ = function (or macro) to clear low-order output words | |
389 | * @copy@ = statement to copy words from input to output | |
390 | * | |
391 | * Use: Performs a shift by a whole number of words. If the shift | |
392 | * amount is @max@ or more words, then the destination is | |
393 | * @clear@ed entirely; otherwise, @copy@ is executed. | |
394 | * | |
395 | * This macro is intended to be used in the @shiftw@ argument of | |
396 | * @MPX_SHIFTOP@, and expects variables describing the operation | |
397 | * to be set up accordingly. | |
398 | */ | |
d03ab969 | 399 | |
5ee480b5 MW |
400 | #define MPX_SHIFTW(max, clear, copy) do { \ |
401 | if (nw >= (max)) clear(dv, dvl); \ | |
402 | else copy \ | |
403 | } while (0) | |
d03ab969 | 404 | |
5ee480b5 MW |
405 | /* --- @MPX_SHIFTOP@ --- * |
406 | * | |
407 | * Arguments: @name@ = name of function to define (without `@mpx_@' prefix) | |
408 | * @shift1@ = statement to shift by a single bit | |
409 | * @shiftw@ = statement to shift by a whole number of words | |
410 | * @shift@ = statement to perform a general shift | |
411 | * | |
412 | * Use: Emits a shift operation. The input is @av@..@avl@; the | |
413 | * output is @dv@..@dvl@; and the shift amount (in bits) is | |
414 | * @n@. In @shiftw@ and @shift@, @nw@ and @nb@ are set up such | |
415 | * that @n = nw*MPW_BITS + nb@ and @nb < MPW_BITS@. | |
416 | */ | |
d03ab969 | 417 | |
5ee480b5 MW |
418 | #define MPX_SHIFTOP(name, shift1, shiftw, shift) \ |
419 | \ | |
420 | void mpx_##name(mpw *dv, mpw *dvl, \ | |
421 | const mpw *av, const mpw *avl, \ | |
422 | size_t n) \ | |
423 | { \ | |
424 | \ | |
425 | if (n == 0) \ | |
426 | MPX_COPY(dv, dvl, av, avl); \ | |
427 | else if (n == 1) \ | |
428 | do shift1 while (0); \ | |
429 | else { \ | |
430 | size_t nw = n/MPW_BITS; \ | |
431 | unsigned nb = n%MPW_BITS; \ | |
432 | if (!nb) do shiftw while (0); \ | |
433 | else do shift while (0); \ | |
434 | } \ | |
435 | } | |
d03ab969 | 436 | |
5ee480b5 MW |
437 | /* --- @MPX_SHIFT_LEFT@ --- * |
438 | * | |
439 | * Arguments: @name@ = name of function to define (without `@mpx_@' prefix) | |
440 | * @init1@ = initializer for single-bit shift accumulator | |
441 | * @clear@ = function (or macro) to clear low-order output words | |
442 | * @flush@ = expression for low-order nontrivial output word | |
443 | * | |
444 | * Use: Emits a left-shift operation. This expands to a call on | |
445 | * @MPX_SHIFTOP@, but implements the complicated @shift@ | |
446 | * statement. | |
447 | * | |
448 | * The @init1@ argument is as for @MPX_SHIFT1@, and @clear@ is | |
449 | * as for @MPX_SHIFTW@ (though is used elsewhere). In a general | |
450 | * shift, @nw@ whole low-order output words are set using | |
451 | * @clear@; high-order words are zeroed; and the remaining words | |
452 | * set with a left-to-right pass across the input; at the end of | |
453 | * the operation, the least significant output word above those | |
454 | * @clear@ed is set using @flush@, which may use the accumulator | |
455 | * @w@ = @av[0] << nb@. | |
456 | */ | |
d03ab969 | 457 | |
5ee480b5 MW |
458 | #define MPX_SHIFT_LEFT(name, init1, clear, flush) \ |
459 | MPX_SHIFTOP(name, { \ | |
460 | MPX_SHIFT1(init1, \ | |
461 | w | (t << 1), \ | |
462 | t >> (MPW_BITS - 1)); \ | |
463 | }, { \ | |
464 | MPX_SHIFTW(dvl - dv, clear, { \ | |
465 | MPX_COPY(dv + nw, dvl, av, avl); \ | |
466 | clear(dv, dv + nw); \ | |
467 | }); \ | |
468 | }, { \ | |
469 | size_t nr = MPW_BITS - nb; \ | |
470 | size_t dvn = dvl - dv; \ | |
471 | size_t avn = avl - av; \ | |
472 | mpw w; \ | |
473 | \ | |
474 | if (dvn <= nw) { \ | |
475 | clear(dv, dvl); \ | |
476 | break; \ | |
477 | } \ | |
478 | \ | |
479 | if (dvn <= avn + nw) { \ | |
480 | avl = av + dvn - nw; \ | |
481 | w = *--avl << nb; \ | |
482 | } else { \ | |
483 | size_t off = avn + nw + 1; \ | |
484 | MPX_ZERO(dv + off, dvl); \ | |
485 | dvl = dv + off; \ | |
486 | w = 0; \ | |
487 | } \ | |
488 | \ | |
489 | while (avl > av) { \ | |
490 | mpw t = *--avl; \ | |
491 | *--dvl = MPW(w | (t >> nr)); \ | |
492 | w = t << nb; \ | |
493 | } \ | |
494 | \ | |
495 | *--dvl = MPW(flush); \ | |
496 | clear(dv, dvl); \ | |
497 | }) | |
c8a2f9ef | 498 | |
5ee480b5 MW |
499 | /* --- @mpx_lsl@ --- * |
500 | * | |
501 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
502 | * @const mpw *av, *avl@ = source vector base and limit | |
503 | * @size_t n@ = number of bit positions to shift by | |
504 | * | |
505 | * Returns: --- | |
506 | * | |
507 | * Use: Performs a logical shift left operation on an integer. | |
508 | */ | |
d03ab969 | 509 | |
5ee480b5 | 510 | MPX_SHIFT_LEFT(lsl, 0, MPX_ZERO, w) |
d03ab969 | 511 | |
81578196 | 512 | /* --- @mpx_lslc@ --- * |
513 | * | |
514 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
515 | * @const mpw *av, *avl@ = source vector base and limit | |
516 | * @size_t n@ = number of bit positions to shift by | |
517 | * | |
518 | * Returns: --- | |
519 | * | |
520 | * Use: Performs a logical shift left operation on an integer, only | |
521 | * it fills in the bits with ones instead of zeroes. | |
522 | */ | |
523 | ||
5ee480b5 | 524 | MPX_SHIFT_LEFT(lslc, 1, MPX_ONE, w | (MPW_MAX >> nr)) |
81578196 | 525 | |
d03ab969 | 526 | /* --- @mpx_lsr@ --- * |
527 | * | |
528 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
529 | * @const mpw *av, *avl@ = source vector base and limit | |
530 | * @size_t n@ = number of bit positions to shift by | |
531 | * | |
532 | * Returns: --- | |
533 | * | |
534 | * Use: Performs a logical shift right operation on an integer. | |
535 | */ | |
536 | ||
5ee480b5 MW |
537 | MPX_SHIFTOP(lsr, { |
538 | MPX_SHIFT1(av < avl ? *av++ >> 1 : 0, | |
539 | w | (t << (MPW_BITS - 1)), | |
540 | t >> 1); | |
541 | }, { | |
542 | MPX_SHIFTW(avl - av, MPX_ZERO, | |
543 | { MPX_COPY(dv, dvl, av + nw, avl); }); | |
544 | }, { | |
545 | size_t nr = MPW_BITS - nb; | |
546 | mpw w; | |
547 | ||
85e29c6e MW |
548 | if (nw >= avl - av) |
549 | w = 0; | |
550 | else { | |
551 | av += nw; | |
552 | w = *av++; | |
553 | ||
554 | while (av < avl) { | |
555 | mpw t; | |
556 | if (dv >= dvl) goto done; | |
557 | t = *av++; | |
558 | *dv++ = MPW((w >> nb) | (t << nr)); | |
559 | w = t; | |
560 | } | |
d03ab969 | 561 | } |
85e29c6e | 562 | |
5ee480b5 MW |
563 | if (dv < dvl) { |
564 | *dv++ = MPW(w >> nb); | |
565 | MPX_ZERO(dv, dvl); | |
d03ab969 | 566 | } |
d03ab969 | 567 | done:; |
5ee480b5 | 568 | }) |
d03ab969 | 569 | |
0f32e0f8 | 570 | /*----- Bitwise operations ------------------------------------------------*/ |
571 | ||
f09e814a | 572 | /* --- @mpx_bitop@ --- * |
0f32e0f8 | 573 | * |
574 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
575 | * @const mpw *av, *avl@ = first source vector | |
576 | * @const mpw *bv, *bvl@ = second source vector | |
577 | * | |
578 | * Returns: --- | |
579 | * | |
f09e814a | 580 | * Use; Provides the dyadic boolean functions. |
0f32e0f8 | 581 | */ |
582 | ||
f09e814a | 583 | #define MPX_BITBINOP(string) \ |
0f32e0f8 | 584 | \ |
f09e814a | 585 | void mpx_bit##string(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, \ |
586 | const mpw *bv, const mpw *bvl) \ | |
0f32e0f8 | 587 | { \ |
588 | MPX_SHRINK(av, avl); \ | |
589 | MPX_SHRINK(bv, bvl); \ | |
590 | \ | |
591 | while (dv < dvl) { \ | |
592 | mpw a, b; \ | |
593 | a = (av < avl) ? *av++ : 0; \ | |
594 | b = (bv < bvl) ? *bv++ : 0; \ | |
75263f25 | 595 | *dv++ = B##string(a, b); \ |
23bbea75 | 596 | IGNORE(a); IGNORE(b); \ |
0f32e0f8 | 597 | } \ |
598 | } | |
599 | ||
f09e814a | 600 | MPX_DOBIN(MPX_BITBINOP) |
0f32e0f8 | 601 | |
602 | void mpx_not(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
603 | { | |
604 | MPX_SHRINK(av, avl); | |
605 | ||
606 | while (dv < dvl) { | |
607 | mpw a; | |
608 | a = (av < avl) ? *av++ : 0; | |
609 | *dv++ = ~a; | |
610 | } | |
611 | } | |
612 | ||
d03ab969 | 613 | /*----- Unsigned arithmetic -----------------------------------------------*/ |
614 | ||
f45a00c6 | 615 | /* --- @mpx_2c@ --- * |
616 | * | |
617 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
618 | * @const mpw *v, *vl@ = source vector | |
619 | * | |
620 | * Returns: --- | |
621 | * | |
622 | * Use: Calculates the two's complement of @v@. | |
623 | */ | |
624 | ||
625 | void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl) | |
626 | { | |
627 | mpw c = 0; | |
628 | while (dv < dvl && v < vl) | |
629 | *dv++ = c = MPW(~*v++); | |
630 | if (dv < dvl) { | |
631 | if (c > MPW_MAX / 2) | |
632 | c = MPW(~0); | |
633 | while (dv < dvl) | |
634 | *dv++ = c; | |
635 | } | |
636 | MPX_UADDN(dv, dvl, 1); | |
637 | } | |
638 | ||
1a05a8ef | 639 | /* --- @mpx_ueq@ --- * |
640 | * | |
641 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
642 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
643 | * | |
644 | * Returns: Nonzero if the two vectors are equal. | |
645 | * | |
646 | * Use: Performs an unsigned integer test for equality. | |
647 | */ | |
648 | ||
649 | int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
650 | { | |
651 | MPX_SHRINK(av, avl); | |
652 | MPX_SHRINK(bv, bvl); | |
653 | if (avl - av != bvl - bv) | |
654 | return (0); | |
655 | while (av < avl) { | |
656 | if (*av++ != *bv++) | |
657 | return (0); | |
658 | } | |
659 | return (1); | |
660 | } | |
661 | ||
d03ab969 | 662 | /* --- @mpx_ucmp@ --- * |
663 | * | |
664 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
665 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
666 | * | |
667 | * Returns: Less than, equal to, or greater than zero depending on | |
668 | * whether @a@ is less than, equal to or greater than @b@, | |
669 | * respectively. | |
670 | * | |
671 | * Use: Performs an unsigned integer comparison. | |
672 | */ | |
673 | ||
674 | int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
675 | { | |
676 | MPX_SHRINK(av, avl); | |
677 | MPX_SHRINK(bv, bvl); | |
678 | ||
679 | if (avl - av > bvl - bv) | |
680 | return (+1); | |
681 | else if (avl - av < bvl - bv) | |
682 | return (-1); | |
683 | else while (avl > av) { | |
684 | mpw a = *--avl, b = *--bvl; | |
685 | if (a > b) | |
686 | return (+1); | |
687 | else if (a < b) | |
688 | return (-1); | |
689 | } | |
690 | return (0); | |
691 | } | |
1a05a8ef | 692 | |
d03ab969 | 693 | /* --- @mpx_uadd@ --- * |
694 | * | |
695 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
696 | * @const mpw *av, *avl@ = first addend vector base and limit | |
697 | * @const mpw *bv, *bvl@ = second addend vector base and limit | |
698 | * | |
699 | * Returns: --- | |
700 | * | |
701 | * Use: Performs unsigned integer addition. If the result overflows | |
702 | * the destination vector, high-order bits are discarded. This | |
703 | * means that two's complement addition happens more or less for | |
704 | * free, although that's more a side-effect than anything else. | |
705 | * The result vector may be equal to either or both source | |
706 | * vectors, but may not otherwise overlap them. | |
707 | */ | |
708 | ||
709 | void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
710 | const mpw *bv, const mpw *bvl) | |
711 | { | |
712 | mpw c = 0; | |
713 | ||
714 | while (av < avl || bv < bvl) { | |
715 | mpw a, b; | |
716 | mpd x; | |
717 | if (dv >= dvl) | |
718 | return; | |
719 | a = (av < avl) ? *av++ : 0; | |
720 | b = (bv < bvl) ? *bv++ : 0; | |
721 | x = (mpd)a + (mpd)b + c; | |
722 | *dv++ = MPW(x); | |
723 | c = x >> MPW_BITS; | |
724 | } | |
725 | if (dv < dvl) { | |
726 | *dv++ = c; | |
727 | MPX_ZERO(dv, dvl); | |
728 | } | |
729 | } | |
730 | ||
dd517851 | 731 | /* --- @mpx_uaddn@ --- * |
732 | * | |
733 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
734 | * @mpw n@ = other addend | |
735 | * | |
736 | * Returns: --- | |
737 | * | |
738 | * Use: Adds a small integer to a multiprecision number. | |
739 | */ | |
740 | ||
741 | void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); } | |
742 | ||
f46efa79 | 743 | /* --- @mpx_uaddnlsl@ --- * |
744 | * | |
745 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
746 | * @mpw a@ = second argument | |
747 | * @unsigned o@ = offset in bits | |
748 | * | |
749 | * Returns: --- | |
750 | * | |
751 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
752 | * high-order bits are discarded, as usual. We must have | |
753 | * @0 < o < MPW_BITS@. | |
754 | */ | |
755 | ||
756 | void mpx_uaddnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
757 | { | |
758 | mpd x = (mpd)a << o; | |
759 | ||
760 | while (x && dv < dvl) { | |
761 | x += *dv; | |
762 | *dv++ = MPW(x); | |
763 | x >>= MPW_BITS; | |
764 | } | |
765 | } | |
766 | ||
d03ab969 | 767 | /* --- @mpx_usub@ --- * |
768 | * | |
769 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
770 | * @const mpw *av, *avl@ = first argument vector base and limit | |
771 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
772 | * | |
773 | * Returns: --- | |
774 | * | |
775 | * Use: Performs unsigned integer subtraction. If the result | |
776 | * overflows the destination vector, high-order bits are | |
777 | * discarded. This means that two's complement subtraction | |
778 | * happens more or less for free, althuogh that's more a side- | |
779 | * effect than anything else. The result vector may be equal to | |
780 | * either or both source vectors, but may not otherwise overlap | |
781 | * them. | |
782 | */ | |
783 | ||
784 | void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
785 | const mpw *bv, const mpw *bvl) | |
786 | { | |
787 | mpw c = 0; | |
788 | ||
789 | while (av < avl || bv < bvl) { | |
790 | mpw a, b; | |
791 | mpd x; | |
792 | if (dv >= dvl) | |
793 | return; | |
794 | a = (av < avl) ? *av++ : 0; | |
795 | b = (bv < bvl) ? *bv++ : 0; | |
c8a2f9ef | 796 | x = (mpd)a - (mpd)b - c; |
d03ab969 | 797 | *dv++ = MPW(x); |
c8a2f9ef | 798 | if (x >> MPW_BITS) |
799 | c = 1; | |
800 | else | |
801 | c = 0; | |
d03ab969 | 802 | } |
c8a2f9ef | 803 | if (c) |
804 | c = MPW_MAX; | |
d03ab969 | 805 | while (dv < dvl) |
c8a2f9ef | 806 | *dv++ = c; |
d03ab969 | 807 | } |
808 | ||
dd517851 | 809 | /* --- @mpx_usubn@ --- * |
810 | * | |
811 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
812 | * @n@ = subtrahend | |
813 | * | |
814 | * Returns: --- | |
815 | * | |
816 | * Use: Subtracts a small integer from a multiprecision number. | |
817 | */ | |
818 | ||
819 | void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } | |
820 | ||
67e6eee2 | 821 | /* --- @mpx_usubnlsl@ --- * |
f46efa79 | 822 | * |
823 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
824 | * @mpw a@ = second argument | |
825 | * @unsigned o@ = offset in bits | |
826 | * | |
827 | * Returns: --- | |
828 | * | |
829 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
830 | * high-order bits are discarded, as usual. We must have | |
831 | * @0 < o < MPW_BITS@. | |
832 | */ | |
833 | ||
834 | void mpx_usubnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
835 | { | |
836 | mpw b = a >> (MPW_BITS - o); | |
837 | a <<= o; | |
838 | ||
839 | if (dv < dvl) { | |
c29970a7 | 840 | mpd x = (mpd)*dv - MPW(a); |
f46efa79 | 841 | *dv++ = MPW(x); |
842 | if (x >> MPW_BITS) | |
843 | b++; | |
844 | MPX_USUBN(dv, dvl, b); | |
845 | } | |
846 | } | |
847 | ||
d03ab969 | 848 | /* --- @mpx_umul@ --- * |
849 | * | |
850 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
851 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
852 | * @const mpw *bv, *bvl@ = multiplier vector base and limit | |
853 | * | |
854 | * Returns: --- | |
855 | * | |
856 | * Use: Performs unsigned integer multiplication. If the result | |
857 | * overflows the desination vector, high-order bits are | |
858 | * discarded. The result vector may not overlap the argument | |
859 | * vectors in any way. | |
860 | */ | |
861 | ||
444083ae MW |
862 | CPU_DISPATCH(EMPTY, (void), void, mpx_umul, |
863 | (mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
864 | const mpw *bv, const mpw *bvl), | |
865 | (dv, dvl, av, avl, bv, bvl), pick_umul, simple_umul); | |
866 | ||
867 | static void simple_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
868 | const mpw *bv, const mpw *bvl) | |
d03ab969 | 869 | { |
870 | /* --- This is probably worthwhile on a multiply --- */ | |
871 | ||
872 | MPX_SHRINK(av, avl); | |
873 | MPX_SHRINK(bv, bvl); | |
874 | ||
875 | /* --- Deal with a multiply by zero --- */ | |
45c0fd36 | 876 | |
d03ab969 | 877 | if (bv == bvl) { |
c8a2f9ef | 878 | MPX_ZERO(dv, dvl); |
d03ab969 | 879 | return; |
880 | } | |
881 | ||
882 | /* --- Do the initial multiply and initialize the accumulator --- */ | |
883 | ||
884 | MPX_UMULN(dv, dvl, av, avl, *bv++); | |
885 | ||
886 | /* --- Do the remaining multiply/accumulates --- */ | |
887 | ||
c8a2f9ef | 888 | while (dv < dvl && bv < bvl) { |
d03ab969 | 889 | mpw m = *bv++; |
c8a2f9ef | 890 | mpw c = 0; |
d03ab969 | 891 | const mpw *avv = av; |
892 | mpw *dvv = ++dv; | |
893 | ||
894 | while (avv < avl) { | |
895 | mpd x; | |
896 | if (dvv >= dvl) | |
897 | goto next; | |
c8a2f9ef | 898 | x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c; |
899 | *dvv++ = MPW(x); | |
d03ab969 | 900 | c = x >> MPW_BITS; |
901 | } | |
c8a2f9ef | 902 | MPX_UADDN(dvv, dvl, c); |
d03ab969 | 903 | next:; |
904 | } | |
905 | } | |
906 | ||
444083ae MW |
907 | #define MAYBE_UMUL4(impl) \ |
908 | extern void mpx_umul4_##impl(mpw */*dv*/, \ | |
909 | const mpw */*av*/, const mpw */*avl*/, \ | |
910 | const mpw */*bv*/, const mpw */*bvl*/); \ | |
911 | static void maybe_umul4_##impl(mpw *dv, mpw *dvl, \ | |
912 | const mpw *av, const mpw *avl, \ | |
913 | const mpw *bv, const mpw *bvl) \ | |
914 | { \ | |
915 | size_t an = avl - av, bn = bvl - bv, dn = dvl - dv; \ | |
916 | if (!an || an%4 != 0 || !bn || bn%4 != 0 || dn < an + bn) \ | |
917 | simple_umul(dv, dvl, av, avl, bv, bvl); \ | |
918 | else { \ | |
919 | mpx_umul4_##impl(dv, av, avl, bv, bvl); \ | |
920 | MPX_ZERO(dv + an + bn, dvl); \ | |
921 | } \ | |
922 | } | |
923 | ||
924 | #if CPUFAM_X86 | |
925 | MAYBE_UMUL4(x86_sse2) | |
b9b279b4 | 926 | MAYBE_UMUL4(x86_avx) |
444083ae MW |
927 | #endif |
928 | ||
3119b3ae MW |
929 | #if CPUFAM_AMD64 |
930 | MAYBE_UMUL4(amd64_sse2) | |
b9b279b4 | 931 | MAYBE_UMUL4(amd64_avx) |
3119b3ae MW |
932 | #endif |
933 | ||
ea1b3cec MW |
934 | #if CPUFAM_ARMEL |
935 | MAYBE_UMUL4(arm_neon) | |
936 | #endif | |
937 | ||
938 | #if CPUFAM_ARM64 | |
939 | MAYBE_UMUL4(arm64_simd) | |
940 | #endif | |
941 | ||
444083ae MW |
942 | static mpx_umul__functype *pick_umul(void) |
943 | { | |
944 | #if CPUFAM_X86 | |
b9b279b4 MW |
945 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_avx, |
946 | cpu_feature_p(CPUFEAT_X86_AVX)); | |
444083ae MW |
947 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_sse2, |
948 | cpu_feature_p(CPUFEAT_X86_SSE2)); | |
3119b3ae MW |
949 | #endif |
950 | #if CPUFAM_AMD64 | |
b9b279b4 MW |
951 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_amd64_avx, |
952 | cpu_feature_p(CPUFEAT_X86_AVX)); | |
3119b3ae MW |
953 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_amd64_sse2, |
954 | cpu_feature_p(CPUFEAT_X86_SSE2)); | |
ea1b3cec MW |
955 | #endif |
956 | #if CPUFAM_ARMEL | |
957 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_arm_neon, | |
958 | cpu_feature_p(CPUFEAT_ARM_NEON)); | |
959 | #endif | |
960 | #if CPUFAM_ARM64 | |
54d36e7b MW |
961 | DISPATCH_PICK_COND(mpx_umul, maybe_umul4_arm64_simd, |
962 | cpu_feature_p(CPUFEAT_ARM_NEON)); | |
444083ae MW |
963 | #endif |
964 | DISPATCH_PICK_FALLBACK(mpx_umul, simple_umul); | |
965 | } | |
966 | ||
dd517851 | 967 | /* --- @mpx_umuln@ --- * |
968 | * | |
969 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
970 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
971 | * @mpw m@ = multiplier | |
972 | * | |
973 | * Returns: --- | |
974 | * | |
975 | * Use: Multiplies a multiprecision integer by a single-word value. | |
976 | * The destination and source may be equal. The destination | |
977 | * is completely cleared after use. | |
978 | */ | |
979 | ||
980 | void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 981 | { MPX_UMULN(dv, dvl, av, avl, m); } |
dd517851 | 982 | |
983 | /* --- @mpx_umlan@ --- * | |
984 | * | |
985 | * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit | |
986 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
987 | * @mpw m@ = multiplier | |
988 | * | |
989 | * Returns: --- | |
990 | * | |
991 | * Use: Multiplies a multiprecision integer by a single-word value | |
992 | * and adds the result to an accumulator. | |
993 | */ | |
994 | ||
995 | void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 996 | { MPX_UMLAN(dv, dvl, av, avl, m); } |
dd517851 | 997 | |
c8a2f9ef | 998 | /* --- @mpx_usqr@ --- * |
999 | * | |
1000 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
1001 | * @const mpw *av, *av@ = source vector base and limit | |
1002 | * | |
1003 | * Returns: --- | |
1004 | * | |
1005 | * Use: Performs unsigned integer squaring. The result vector must | |
1006 | * not overlap the source vector in any way. | |
1007 | */ | |
1008 | ||
1009 | void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
1010 | { | |
1011 | MPX_ZERO(dv, dvl); | |
1012 | ||
1013 | /* --- Main loop --- */ | |
1014 | ||
1015 | while (av < avl) { | |
1016 | const mpw *avv = av; | |
1017 | mpw *dvv = dv; | |
1018 | mpw a = *av; | |
1019 | mpd c; | |
1020 | ||
1021 | /* --- Stop if I've run out of destination --- */ | |
1022 | ||
1023 | if (dvv >= dvl) | |
1024 | break; | |
1025 | ||
1026 | /* --- Work out the square at this point in the proceedings --- */ | |
1027 | ||
1028 | { | |
c8a2f9ef | 1029 | mpd x = (mpd)a * (mpd)a + *dvv; |
1030 | *dvv++ = MPW(x); | |
1031 | c = MPW(x >> MPW_BITS); | |
1032 | } | |
1033 | ||
1034 | /* --- Now fix up the rest of the vector upwards --- */ | |
1035 | ||
1036 | avv++; | |
1037 | while (dvv < dvl && avv < avl) { | |
c8a2f9ef | 1038 | mpd x = (mpd)a * (mpd)*avv++; |
1039 | mpd y = ((x << 1) & MPW_MAX) + c + *dvv; | |
1040 | c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS); | |
1041 | *dvv++ = MPW(y); | |
1042 | } | |
1043 | while (dvv < dvl && c) { | |
1044 | mpd x = c + *dvv; | |
1045 | *dvv++ = MPW(x); | |
1046 | c = x >> MPW_BITS; | |
1047 | } | |
1048 | ||
1049 | /* --- Get ready for the next round --- */ | |
1050 | ||
1051 | av++; | |
1052 | dv += 2; | |
1053 | } | |
1054 | } | |
1055 | ||
d03ab969 | 1056 | /* --- @mpx_udiv@ --- * |
1057 | * | |
1058 | * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit | |
1059 | * @mpw *rv, *rvl@ = dividend/remainder vector base and limit | |
1060 | * @const mpw *dv, *dvl@ = divisor vector base and limit | |
c8a2f9ef | 1061 | * @mpw *sv, *svl@ = scratch workspace |
d03ab969 | 1062 | * |
1063 | * Returns: --- | |
1064 | * | |
1065 | * Use: Performs unsigned integer division. If the result overflows | |
1066 | * the quotient vector, high-order bits are discarded. (Clearly | |
1067 | * the remainder vector can't overflow.) The various vectors | |
1068 | * may not overlap in any way. Yes, I know it's a bit odd | |
1069 | * requiring the dividend to be in the result position but it | |
1070 | * does make some sense really. The remainder must have | |
c8a2f9ef | 1071 | * headroom for at least two extra words. The scratch space |
f45a00c6 | 1072 | * must be at least one word larger than the divisor. |
d03ab969 | 1073 | */ |
1074 | ||
1075 | void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl, | |
c8a2f9ef | 1076 | const mpw *dv, const mpw *dvl, |
1077 | mpw *sv, mpw *svl) | |
d03ab969 | 1078 | { |
d03ab969 | 1079 | unsigned norm = 0; |
1080 | size_t scale; | |
1081 | mpw d, dd; | |
1082 | ||
1083 | /* --- Initialize the quotient --- */ | |
1084 | ||
1085 | MPX_ZERO(qv, qvl); | |
1086 | ||
c8a2f9ef | 1087 | /* --- Perform some sanity checks --- */ |
1088 | ||
1089 | MPX_SHRINK(dv, dvl); | |
1090 | assert(((void)"division by zero in mpx_udiv", dv < dvl)); | |
1091 | ||
d03ab969 | 1092 | /* --- Normalize the divisor --- * |
1093 | * | |
1094 | * The algorithm requires that the divisor be at least two digits long. | |
1095 | * This is easy to fix. | |
1096 | */ | |
1097 | ||
c8a2f9ef | 1098 | { |
1099 | unsigned b; | |
d03ab969 | 1100 | |
c8a2f9ef | 1101 | d = dvl[-1]; |
c29970a7 | 1102 | for (b = MPW_P2; b; b >>= 1) { |
34e4f738 | 1103 | if (d <= (MPW_MAX >> b)) { |
c8a2f9ef | 1104 | d <<= b; |
1105 | norm += b; | |
1106 | } | |
1107 | } | |
1108 | if (dv + 1 == dvl) | |
1109 | norm += MPW_BITS; | |
d03ab969 | 1110 | } |
d03ab969 | 1111 | |
1112 | /* --- Normalize the dividend/remainder to match --- */ | |
1113 | ||
c8a2f9ef | 1114 | if (norm) { |
c8a2f9ef | 1115 | mpx_lsl(rv, rvl, rv, rvl, norm); |
f45a00c6 | 1116 | mpx_lsl(sv, svl, dv, dvl, norm); |
c8a2f9ef | 1117 | dv = sv; |
f45a00c6 | 1118 | dvl = svl; |
c8a2f9ef | 1119 | MPX_SHRINK(dv, dvl); |
1120 | } | |
1121 | ||
d03ab969 | 1122 | MPX_SHRINK(rv, rvl); |
c8a2f9ef | 1123 | d = dvl[-1]; |
1124 | dd = dvl[-2]; | |
d03ab969 | 1125 | |
1126 | /* --- Work out the relative scales --- */ | |
1127 | ||
1128 | { | |
1129 | size_t rvn = rvl - rv; | |
c8a2f9ef | 1130 | size_t dvn = dvl - dv; |
d03ab969 | 1131 | |
1132 | /* --- If the divisor is clearly larger, notice this --- */ | |
1133 | ||
1134 | if (dvn > rvn) { | |
1135 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
1136 | return; | |
1137 | } | |
1138 | ||
1139 | scale = rvn - dvn; | |
1140 | } | |
1141 | ||
1142 | /* --- Calculate the most significant quotient digit --- * | |
1143 | * | |
1144 | * Because the divisor has its top bit set, this can only happen once. The | |
1145 | * pointer arithmetic is a little contorted, to make sure that the | |
1146 | * behaviour is defined. | |
1147 | */ | |
1148 | ||
1149 | if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) { | |
1150 | mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1151 | if (qvl - qv > scale) | |
1152 | qv[scale] = 1; | |
1153 | } | |
1154 | ||
1155 | /* --- Now for the main loop --- */ | |
1156 | ||
1157 | { | |
c8a2f9ef | 1158 | mpw *rvv = rvl - 2; |
d03ab969 | 1159 | |
1160 | while (scale) { | |
c8a2f9ef | 1161 | mpw q; |
1162 | mpd rh; | |
d03ab969 | 1163 | |
1164 | /* --- Get an estimate for the next quotient digit --- */ | |
1165 | ||
c8a2f9ef | 1166 | mpw r = rvv[1]; |
1167 | mpw rr = rvv[0]; | |
1168 | mpw rrr = *--rvv; | |
1169 | ||
1170 | scale--; | |
1171 | rh = ((mpd)r << MPW_BITS) | rr; | |
d03ab969 | 1172 | if (r == d) |
1173 | q = MPW_MAX; | |
c8a2f9ef | 1174 | else |
1175 | q = MPW(rh / d); | |
d03ab969 | 1176 | |
1177 | /* --- Refine the estimate --- */ | |
1178 | ||
1179 | { | |
1180 | mpd yh = (mpd)d * q; | |
ce76ff16 | 1181 | mpd yy = (mpd)dd * q; |
1182 | mpw yl; | |
c8a2f9ef | 1183 | |
ce76ff16 | 1184 | if (yy > MPW_MAX) |
1185 | yh += yy >> MPW_BITS; | |
1186 | yl = MPW(yy); | |
c8a2f9ef | 1187 | |
1188 | while (yh > rh || (yh == rh && yl > rrr)) { | |
1189 | q--; | |
1190 | yh -= d; | |
ce76ff16 | 1191 | if (yl < dd) |
1192 | yh--; | |
99b30c23 | 1193 | yl = MPW(yl - dd); |
c8a2f9ef | 1194 | } |
1195 | } | |
1196 | ||
1197 | /* --- Remove a chunk from the dividend --- */ | |
1198 | ||
1199 | { | |
1200 | mpw *svv; | |
1201 | const mpw *dvv; | |
f45a00c6 | 1202 | mpw mc = 0, sc = 0; |
c8a2f9ef | 1203 | |
f45a00c6 | 1204 | /* --- Calculate the size of the chunk --- * |
1205 | * | |
1206 | * This does the whole job of calculating @r >> scale - qd@. | |
1207 | */ | |
c8a2f9ef | 1208 | |
f45a00c6 | 1209 | for (svv = rv + scale, dvv = dv; |
1210 | dvv < dvl && svv < rvl; | |
1211 | svv++, dvv++) { | |
1212 | mpd x = (mpd)*dvv * (mpd)q + mc; | |
1213 | mc = x >> MPW_BITS; | |
1214 | x = (mpd)*svv - MPW(x) - sc; | |
c8a2f9ef | 1215 | *svv = MPW(x); |
f45a00c6 | 1216 | if (x >> MPW_BITS) |
1217 | sc = 1; | |
1218 | else | |
1219 | sc = 0; | |
1220 | } | |
1221 | ||
1222 | if (svv < rvl) { | |
1223 | mpd x = (mpd)*svv - mc - sc; | |
1224 | *svv++ = MPW(x); | |
1225 | if (x >> MPW_BITS) | |
1226 | sc = MPW_MAX; | |
1227 | else | |
1228 | sc = 0; | |
1229 | while (svv < rvl) | |
1230 | *svv++ = sc; | |
c8a2f9ef | 1231 | } |
c8a2f9ef | 1232 | |
f45a00c6 | 1233 | /* --- Fix if the quotient was too large --- * |
c8a2f9ef | 1234 | * |
f45a00c6 | 1235 | * This doesn't seem to happen very often. |
c8a2f9ef | 1236 | */ |
1237 | ||
c8a2f9ef | 1238 | if (rvl[-1] > MPW_MAX / 2) { |
1239 | mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1240 | q--; | |
1241 | } | |
1242 | } | |
1243 | ||
1244 | /* --- Done for another iteration --- */ | |
1245 | ||
1246 | if (qvl - qv > scale) | |
1247 | qv[scale] = q; | |
1248 | r = rr; | |
1249 | rr = rrr; | |
1250 | } | |
1251 | } | |
1252 | ||
1253 | /* --- Now fiddle with unnormalizing and things --- */ | |
1254 | ||
1255 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
d03ab969 | 1256 | } |
1257 | ||
698bd937 | 1258 | /* --- @mpx_udivn@ --- * |
1259 | * | |
1260 | * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap | |
1261 | * dividend) | |
1262 | * @const mpw *rv, *rvl@ = dividend | |
1263 | * @mpw d@ = single-precision divisor | |
1264 | * | |
1265 | * Returns: Remainder after divison. | |
1266 | * | |
1267 | * Use: Performs a single-precision division operation. | |
1268 | */ | |
1269 | ||
1270 | mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) | |
1271 | { | |
1272 | size_t i; | |
1273 | size_t ql = qvl - qv; | |
1274 | mpd r = 0; | |
1275 | ||
1276 | i = rvl - rv; | |
1277 | while (i > 0) { | |
1278 | i--; | |
1279 | r = (r << MPW_BITS) | rv[i]; | |
1280 | if (i < ql) | |
1281 | qv[i] = r / d; | |
1282 | r %= d; | |
1283 | } | |
1284 | return (MPW(r)); | |
1285 | } | |
1286 | ||
42684bdb | 1287 | /*----- Test rig ----------------------------------------------------------*/ |
1288 | ||
1289 | #ifdef TEST_RIG | |
1290 | ||
1291 | #include <mLib/alloc.h> | |
1292 | #include <mLib/dstr.h> | |
141c1284 | 1293 | #include <mLib/macros.h> |
42684bdb | 1294 | #include <mLib/quis.h> |
1295 | #include <mLib/testrig.h> | |
1296 | ||
416b8869 MW |
1297 | #ifdef ENABLE_ASM_DEBUG |
1298 | # include "regdump.h" | |
1299 | #endif | |
1300 | ||
42684bdb | 1301 | #include "mpscan.h" |
1302 | ||
1303 | #define ALLOC(v, vl, sz) do { \ | |
1304 | size_t _sz = (sz); \ | |
1305 | mpw *_vv = xmalloc(MPWS(_sz)); \ | |
1306 | mpw *_vvl = _vv + _sz; \ | |
444083ae | 1307 | memset(_vv, 0xa5, MPWS(_sz)); \ |
42684bdb | 1308 | (v) = _vv; \ |
1309 | (vl) = _vvl; \ | |
1310 | } while (0) | |
1311 | ||
1312 | #define LOAD(v, vl, d) do { \ | |
1313 | const dstr *_d = (d); \ | |
1314 | mpw *_v, *_vl; \ | |
1315 | ALLOC(_v, _vl, MPW_RQ(_d->len)); \ | |
1316 | mpx_loadb(_v, _vl, _d->buf, _d->len); \ | |
1317 | (v) = _v; \ | |
1318 | (vl) = _vl; \ | |
1319 | } while (0) | |
1320 | ||
1321 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
45c0fd36 | 1322 | |
42684bdb | 1323 | static void dumpbits(const char *msg, const void *pp, size_t sz) |
1324 | { | |
1325 | const octet *p = pp; | |
1326 | fputs(msg, stderr); | |
1327 | for (; sz; sz--) | |
1328 | fprintf(stderr, " %02x", *p++); | |
1329 | fputc('\n', stderr); | |
1330 | } | |
1331 | ||
1332 | static void dumpmp(const char *msg, const mpw *v, const mpw *vl) | |
1333 | { | |
1334 | fputs(msg, stderr); | |
1335 | MPX_SHRINK(v, vl); | |
1336 | while (v < vl) | |
1337 | fprintf(stderr, " %08lx", (unsigned long)*--vl); | |
1338 | fputc('\n', stderr); | |
1339 | } | |
1340 | ||
1341 | static int chkscan(const mpw *v, const mpw *vl, | |
1342 | const void *pp, size_t sz, int step) | |
1343 | { | |
1344 | mpscan mps; | |
1345 | const octet *p = pp; | |
1346 | unsigned bit = 0; | |
1347 | int ok = 1; | |
1348 | ||
1349 | mpscan_initx(&mps, v, vl); | |
1350 | while (sz) { | |
1351 | unsigned x = *p; | |
1352 | int i; | |
1353 | p += step; | |
1354 | for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) { | |
1355 | if (MPSCAN_BIT(&mps) != (x & 1)) { | |
1356 | fprintf(stderr, | |
1357 | "\n*** error, step %i, bit %u, expected %u, found %u\n", | |
1358 | step, bit, x & 1, MPSCAN_BIT(&mps)); | |
1359 | ok = 0; | |
1360 | } | |
1361 | x >>= 1; | |
1362 | bit++; | |
1363 | } | |
1364 | sz--; | |
1365 | } | |
1366 | ||
1367 | return (ok); | |
1368 | } | |
1369 | ||
1370 | static int loadstore(dstr *v) | |
1371 | { | |
1372 | dstr d = DSTR_INIT; | |
1373 | size_t sz = MPW_RQ(v->len) * 2, diff; | |
1374 | mpw *m, *ml; | |
1375 | int ok = 1; | |
1376 | ||
1377 | dstr_ensure(&d, v->len); | |
1378 | m = xmalloc(MPWS(sz)); | |
1379 | ||
1380 | for (diff = 0; diff < sz; diff += 5) { | |
1381 | size_t oct; | |
1382 | ||
1383 | ml = m + sz - diff; | |
1384 | ||
1385 | mpx_loadl(m, ml, v->buf, v->len); | |
1386 | if (!chkscan(m, ml, v->buf, v->len, +1)) | |
1387 | ok = 0; | |
1388 | MPX_OCTETS(oct, m, ml); | |
1389 | mpx_storel(m, ml, d.buf, d.sz); | |
141c1284 | 1390 | if (MEMCMP(d.buf, !=, v->buf, oct)) { |
42684bdb | 1391 | dumpbits("\n*** storel failed", d.buf, d.sz); |
1392 | ok = 0; | |
1393 | } | |
1394 | ||
1395 | mpx_loadb(m, ml, v->buf, v->len); | |
1396 | if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1)) | |
1397 | ok = 0; | |
1398 | MPX_OCTETS(oct, m, ml); | |
1399 | mpx_storeb(m, ml, d.buf, d.sz); | |
141c1284 | 1400 | if (MEMCMP(d.buf + d.sz - oct, !=, v->buf + v->len - oct, oct)) { |
42684bdb | 1401 | dumpbits("\n*** storeb failed", d.buf, d.sz); |
1402 | ok = 0; | |
1403 | } | |
1404 | } | |
1405 | ||
1406 | if (!ok) | |
1407 | dumpbits("input data", v->buf, v->len); | |
1408 | ||
12ed8a1f | 1409 | xfree(m); |
42684bdb | 1410 | dstr_destroy(&d); |
1411 | return (ok); | |
1412 | } | |
1413 | ||
f09e814a | 1414 | static int twocl(dstr *v) |
1415 | { | |
1416 | dstr d = DSTR_INIT; | |
850dc272 MW |
1417 | mpw *m, *ml0, *ml1; |
1418 | size_t sz0, sz1, szmax; | |
f09e814a | 1419 | int ok = 1; |
850dc272 | 1420 | int i; |
f09e814a | 1421 | |
850dc272 MW |
1422 | sz0 = MPW_RQ(v[0].len); sz1 = MPW_RQ(v[1].len); |
1423 | dstr_ensure(&d, v[0].len > v[1].len ? v[0].len : v[1].len); | |
f09e814a | 1424 | |
850dc272 MW |
1425 | szmax = sz0 > sz1 ? sz0 : sz1; |
1426 | m = xmalloc(MPWS(szmax)); | |
1427 | ml0 = m + sz0; ml1 = m + sz1; | |
f09e814a | 1428 | |
850dc272 MW |
1429 | for (i = 0; i < 2; i++) { |
1430 | if (i) ml0 = ml1 = m + szmax; | |
f09e814a | 1431 | |
850dc272 MW |
1432 | mpx_loadl(m, ml0, v[0].buf, v[0].len); |
1433 | mpx_storel2cn(m, ml0, d.buf, v[1].len); | |
141c1284 | 1434 | if (MEMCMP(d.buf, !=, v[1].buf, v[1].len)) { |
850dc272 MW |
1435 | dumpbits("\n*** storel2cn failed", d.buf, v[1].len); |
1436 | ok = 0; | |
1437 | } | |
1438 | ||
1439 | mpx_loadl2cn(m, ml1, v[1].buf, v[1].len); | |
1440 | mpx_storel(m, ml1, d.buf, v[0].len); | |
141c1284 | 1441 | if (MEMCMP(d.buf, !=, v[0].buf, v[0].len)) { |
850dc272 MW |
1442 | dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); |
1443 | ok = 0; | |
1444 | } | |
f09e814a | 1445 | } |
1446 | ||
1447 | if (!ok) { | |
1448 | dumpbits("pos", v[0].buf, v[0].len); | |
1449 | dumpbits("neg", v[1].buf, v[1].len); | |
1450 | } | |
1451 | ||
12ed8a1f | 1452 | xfree(m); |
f09e814a | 1453 | dstr_destroy(&d); |
1454 | ||
1455 | return (ok); | |
1456 | } | |
1457 | ||
1458 | static int twocb(dstr *v) | |
1459 | { | |
1460 | dstr d = DSTR_INIT; | |
850dc272 MW |
1461 | mpw *m, *ml0, *ml1; |
1462 | size_t sz0, sz1, szmax; | |
f09e814a | 1463 | int ok = 1; |
850dc272 | 1464 | int i; |
f09e814a | 1465 | |
850dc272 MW |
1466 | sz0 = MPW_RQ(v[0].len); sz1 = MPW_RQ(v[1].len); |
1467 | dstr_ensure(&d, v[0].len > v[1].len ? v[0].len : v[1].len); | |
f09e814a | 1468 | |
850dc272 MW |
1469 | szmax = sz0 > sz1 ? sz0 : sz1; |
1470 | m = xmalloc(MPWS(szmax)); | |
1471 | ml0 = m + sz0; ml1 = m + sz1; | |
f09e814a | 1472 | |
850dc272 MW |
1473 | for (i = 0; i < 2; i++) { |
1474 | if (i) ml0 = ml1 = m + szmax; | |
1475 | ||
1476 | mpx_loadb(m, ml0, v[0].buf, v[0].len); | |
1477 | mpx_storeb2cn(m, ml0, d.buf, v[1].len); | |
141c1284 | 1478 | if (MEMCMP(d.buf, !=, v[1].buf, v[1].len)) { |
850dc272 MW |
1479 | dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); |
1480 | ok = 0; | |
1481 | } | |
f09e814a | 1482 | |
850dc272 MW |
1483 | mpx_loadb2cn(m, ml1, v[1].buf, v[1].len); |
1484 | mpx_storeb(m, ml1, d.buf, v[0].len); | |
141c1284 | 1485 | if (MEMCMP(d.buf, !=, v[0].buf, v[0].len)) { |
850dc272 MW |
1486 | dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); |
1487 | ok = 0; | |
1488 | } | |
f09e814a | 1489 | } |
1490 | ||
1491 | if (!ok) { | |
1492 | dumpbits("pos", v[0].buf, v[0].len); | |
1493 | dumpbits("neg", v[1].buf, v[1].len); | |
1494 | } | |
1495 | ||
12ed8a1f | 1496 | xfree(m); |
f09e814a | 1497 | dstr_destroy(&d); |
1498 | ||
1499 | return (ok); | |
1500 | } | |
1501 | ||
42684bdb | 1502 | static int lsl(dstr *v) |
1503 | { | |
1504 | mpw *a, *al; | |
1505 | int n = *(int *)v[1].buf; | |
1506 | mpw *c, *cl; | |
1507 | mpw *d, *dl; | |
1508 | int ok = 1; | |
1509 | ||
1510 | LOAD(a, al, &v[0]); | |
1511 | LOAD(c, cl, &v[2]); | |
1512 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1513 | ||
1514 | mpx_lsl(d, dl, a, al, n); | |
1a05a8ef | 1515 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1516 | fprintf(stderr, "\n*** lsl(%i) failed\n", n); |
45c0fd36 | 1517 | dumpmp(" a", a, al); |
42684bdb | 1518 | dumpmp("expected", c, cl); |
1519 | dumpmp(" result", d, dl); | |
1520 | ok = 0; | |
1521 | } | |
1522 | ||
12ed8a1f | 1523 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1524 | return (ok); |
1525 | } | |
1526 | ||
81578196 | 1527 | static int lslc(dstr *v) |
1528 | { | |
1529 | mpw *a, *al; | |
1530 | int n = *(int *)v[1].buf; | |
1531 | mpw *c, *cl; | |
1532 | mpw *d, *dl; | |
1533 | int ok = 1; | |
1534 | ||
1535 | LOAD(a, al, &v[0]); | |
1536 | LOAD(c, cl, &v[2]); | |
1537 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1538 | ||
1539 | mpx_lslc(d, dl, a, al, n); | |
1540 | if (!mpx_ueq(d, dl, c, cl)) { | |
1541 | fprintf(stderr, "\n*** lslc(%i) failed\n", n); | |
45c0fd36 | 1542 | dumpmp(" a", a, al); |
81578196 | 1543 | dumpmp("expected", c, cl); |
1544 | dumpmp(" result", d, dl); | |
1545 | ok = 0; | |
1546 | } | |
1547 | ||
12ed8a1f | 1548 | xfree(a); xfree(c); xfree(d); |
81578196 | 1549 | return (ok); |
1550 | } | |
1551 | ||
42684bdb | 1552 | static int lsr(dstr *v) |
1553 | { | |
1554 | mpw *a, *al; | |
1555 | int n = *(int *)v[1].buf; | |
1556 | mpw *c, *cl; | |
1557 | mpw *d, *dl; | |
1558 | int ok = 1; | |
1559 | ||
1560 | LOAD(a, al, &v[0]); | |
1561 | LOAD(c, cl, &v[2]); | |
1562 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1); | |
1563 | ||
1564 | mpx_lsr(d, dl, a, al, n); | |
1a05a8ef | 1565 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1566 | fprintf(stderr, "\n*** lsr(%i) failed\n", n); |
45c0fd36 | 1567 | dumpmp(" a", a, al); |
42684bdb | 1568 | dumpmp("expected", c, cl); |
1569 | dumpmp(" result", d, dl); | |
1570 | ok = 0; | |
1571 | } | |
1572 | ||
12ed8a1f | 1573 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1574 | return (ok); |
1575 | } | |
1576 | ||
1577 | static int uadd(dstr *v) | |
1578 | { | |
1579 | mpw *a, *al; | |
1580 | mpw *b, *bl; | |
1581 | mpw *c, *cl; | |
1582 | mpw *d, *dl; | |
1583 | int ok = 1; | |
1584 | ||
1585 | LOAD(a, al, &v[0]); | |
1586 | LOAD(b, bl, &v[1]); | |
1587 | LOAD(c, cl, &v[2]); | |
1588 | ALLOC(d, dl, MAX(al - a, bl - b) + 1); | |
1589 | ||
1590 | mpx_uadd(d, dl, a, al, b, bl); | |
1a05a8ef | 1591 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1592 | fprintf(stderr, "\n*** uadd failed\n"); |
45c0fd36 MW |
1593 | dumpmp(" a", a, al); |
1594 | dumpmp(" b", b, bl); | |
42684bdb | 1595 | dumpmp("expected", c, cl); |
1596 | dumpmp(" result", d, dl); | |
1597 | ok = 0; | |
1598 | } | |
1599 | ||
12ed8a1f | 1600 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1601 | return (ok); |
1602 | } | |
1603 | ||
1604 | static int usub(dstr *v) | |
1605 | { | |
1606 | mpw *a, *al; | |
1607 | mpw *b, *bl; | |
1608 | mpw *c, *cl; | |
1609 | mpw *d, *dl; | |
1610 | int ok = 1; | |
1611 | ||
1612 | LOAD(a, al, &v[0]); | |
1613 | LOAD(b, bl, &v[1]); | |
1614 | LOAD(c, cl, &v[2]); | |
1615 | ALLOC(d, dl, al - a); | |
1616 | ||
1617 | mpx_usub(d, dl, a, al, b, bl); | |
1a05a8ef | 1618 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1619 | fprintf(stderr, "\n*** usub failed\n"); |
45c0fd36 MW |
1620 | dumpmp(" a", a, al); |
1621 | dumpmp(" b", b, bl); | |
42684bdb | 1622 | dumpmp("expected", c, cl); |
1623 | dumpmp(" result", d, dl); | |
1624 | ok = 0; | |
1625 | } | |
1626 | ||
12ed8a1f | 1627 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1628 | return (ok); |
1629 | } | |
1630 | ||
1631 | static int umul(dstr *v) | |
1632 | { | |
1633 | mpw *a, *al; | |
1634 | mpw *b, *bl; | |
1635 | mpw *c, *cl; | |
1636 | mpw *d, *dl; | |
1637 | int ok = 1; | |
1638 | ||
1639 | LOAD(a, al, &v[0]); | |
1640 | LOAD(b, bl, &v[1]); | |
1641 | LOAD(c, cl, &v[2]); | |
1642 | ALLOC(d, dl, (al - a) + (bl - b)); | |
1643 | ||
1644 | mpx_umul(d, dl, a, al, b, bl); | |
1a05a8ef | 1645 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1646 | fprintf(stderr, "\n*** umul failed\n"); |
45c0fd36 MW |
1647 | dumpmp(" a", a, al); |
1648 | dumpmp(" b", b, bl); | |
42684bdb | 1649 | dumpmp("expected", c, cl); |
1650 | dumpmp(" result", d, dl); | |
1651 | ok = 0; | |
1652 | } | |
1653 | ||
12ed8a1f | 1654 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1655 | return (ok); |
1656 | } | |
1657 | ||
1658 | static int usqr(dstr *v) | |
1659 | { | |
1660 | mpw *a, *al; | |
1661 | mpw *c, *cl; | |
1662 | mpw *d, *dl; | |
1663 | int ok = 1; | |
1664 | ||
1665 | LOAD(a, al, &v[0]); | |
1666 | LOAD(c, cl, &v[1]); | |
1667 | ALLOC(d, dl, 2 * (al - a)); | |
1668 | ||
1669 | mpx_usqr(d, dl, a, al); | |
1a05a8ef | 1670 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1671 | fprintf(stderr, "\n*** usqr failed\n"); |
45c0fd36 | 1672 | dumpmp(" a", a, al); |
42684bdb | 1673 | dumpmp("expected", c, cl); |
1674 | dumpmp(" result", d, dl); | |
1675 | ok = 0; | |
1676 | } | |
1677 | ||
12ed8a1f | 1678 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1679 | return (ok); |
1680 | } | |
1681 | ||
1682 | static int udiv(dstr *v) | |
1683 | { | |
1684 | mpw *a, *al; | |
1685 | mpw *b, *bl; | |
1686 | mpw *q, *ql; | |
1687 | mpw *r, *rl; | |
1688 | mpw *qq, *qql; | |
1689 | mpw *s, *sl; | |
1690 | int ok = 1; | |
1691 | ||
1692 | ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len); | |
1693 | LOAD(b, bl, &v[1]); | |
1694 | LOAD(q, ql, &v[2]); | |
1695 | LOAD(r, rl, &v[3]); | |
1696 | ALLOC(qq, qql, al - a); | |
1697 | ALLOC(s, sl, (bl - b) + 1); | |
1698 | ||
1699 | mpx_udiv(qq, qql, a, al, b, bl, s, sl); | |
1a05a8ef | 1700 | if (!mpx_ueq(qq, qql, q, ql) || |
1701 | !mpx_ueq(a, al, r, rl)) { | |
42684bdb | 1702 | fprintf(stderr, "\n*** udiv failed\n"); |
1703 | dumpmp(" divisor", b, bl); | |
1704 | dumpmp("expect r", r, rl); | |
1705 | dumpmp("result r", a, al); | |
1706 | dumpmp("expect q", q, ql); | |
1707 | dumpmp("result q", qq, qql); | |
1708 | ok = 0; | |
1709 | } | |
1710 | ||
12ed8a1f | 1711 | xfree(a); xfree(b); xfree(r); xfree(q); xfree(s); xfree(qq); |
42684bdb | 1712 | return (ok); |
1713 | } | |
1714 | ||
1715 | static test_chunk defs[] = { | |
1716 | { "load-store", loadstore, { &type_hex, 0 } }, | |
f09e814a | 1717 | { "2cl", twocl, { &type_hex, &type_hex, } }, |
1718 | { "2cb", twocb, { &type_hex, &type_hex, } }, | |
42684bdb | 1719 | { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } }, |
81578196 | 1720 | { "lslc", lslc, { &type_hex, &type_int, &type_hex, 0 } }, |
42684bdb | 1721 | { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } }, |
1722 | { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1723 | { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1724 | { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1725 | { "usqr", usqr, { &type_hex, &type_hex, 0 } }, | |
1726 | { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } }, | |
1727 | { 0, 0, { 0 } } | |
1728 | }; | |
1729 | ||
1730 | int main(int argc, char *argv[]) | |
1731 | { | |
416b8869 MW |
1732 | #ifdef ENABLE_ASM_DEBUG |
1733 | regdump_init(); | |
1734 | #endif | |
0f00dc4c | 1735 | test_run(argc, argv, defs, SRCDIR"/t/mpx"); |
42684bdb | 1736 | return (0); |
1737 | } | |
1738 | ||
42684bdb | 1739 | #endif |
1740 | ||
d03ab969 | 1741 | /*----- That's all, folks -------------------------------------------------*/ |