chiark / gitweb /
rand/rand-x86ish.S: Hoist argument register allocation outside.
[catacomb] / math / scmul.h
1 /* -*-c-*-
2  *
3  * Scalar multiplication on elliptic curves
4  *
5  * (c) 2017 Straylight/Edgeware
6  */
7
8 /*----- Licensing notice --------------------------------------------------*
9  *
10  * This file is part of Catacomb.
11  *
12  * Catacomb is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Library General Public License as
14  * published by the Free Software Foundation; either version 2 of the
15  * License, or (at your option) any later version.
16  *
17  * Catacomb is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License along with Catacomb; if not, write to the Free
24  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25  * MA 02111-1307, USA.
26  */
27
28 #ifndef CATACOMB_SCMUL_H
29 #define CATACOMB_SCMUL_H
30
31 #ifdef __cplusplus
32   extern "C" {
33 #endif
34
35 /*----- Macros provided ---------------------------------------------------*/
36
37 #define SCMUL_WINLIM(winwd) (1 << (winwd))
38 #define SCMUL_WINMASK(winwd) (SCMUL_WINLIM(winwd) - 1)
39 #define SCMUL_TABSZ(winwd) (SCMUL_WINLIM(winwd)/2 + 1)
40
41 #define DEFINE_SCMUL(mulfn, f, winwd, scafwd, npiece, addfn, dblfn)     \
42 void mulfn(f *X, f *Y, f *Z, const scaf_piece n[npiece],                \
43            const f *X0, const f *Y0, const f *Z0)                       \
44 {                                                                       \
45   f VX[SCMUL_TABSZ(winwd)],                                             \
46     VY[SCMUL_TABSZ(winwd)],                                             \
47     VZ[SCMUL_TABSZ(winwd)];                                             \
48   f TX, TY, TZ, UX, UY, UZ;                                             \
49   unsigned i, j, k, w;                                                  \
50   uint32 m_neg;                                                         \
51   scaf_piece ni;                                                        \
52                                                                         \
53   /* Build a table of small multiples. */                               \
54   f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1);           \
55   VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0;                                \
56   ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]);                \
57   for (i = 3; i < SCMUL_TABSZ(winwd); i += 2) {                         \
58     addfn(&VX[i], &VY[i], &VZ[i],                                       \
59           &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0);              \
60     dblfn(&VX[i + 1], &VY[i + 1], &VZ[i + 1],                           \
61           &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]);              \
62   }                                                                     \
63                                                                         \
64   /* Now do the multiplication.  We lag a window behind the cursor      \
65    * position because of the scalar recoding we do.                     \
66    */                                                                   \
67   f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1);                    \
68   for (i = (npiece), w = 0, m_neg = 0; i--; ) {                         \
69     ni = n[i];                                                          \
70                                                                         \
71     /* Work through each window in the scalar piece. */                 \
72     for (j = 0; j < (scafwd); j += (winwd)) {                           \
73                                                                         \
74       /* Shift along by a window. */                                    \
75       for (k = 0; k < (winwd); k++)                                     \
76         dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);                            \
77                                                                         \
78       /* Peek at the next window of four bits.  If the top bit is set   \
79        * we lend a bit leftwards, into w.  It's too late for this to    \
80        * affect the sign now, but if we negated earlier then the        \
81        * addition would be wrong.                                       \
82        */                                                               \
83       w += (ni >> ((scafwd) - 1))&0x1u;                                 \
84       w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg);               \
85                                                                         \
86       /* Collect the entry from the table, and add or subtract. */      \
87       f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w);                        \
88       f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w);                        \
89       f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w);                        \
90       f##_condneg(&UX, &UX, m_neg);                                     \
91       addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);               \
92                                                                         \
93       /* Move the next window into the delay slot.  If its top bit is   \
94        * set, then negate it and set m_neg.                             \
95        */                                                               \
96       w = (ni >> ((scafwd) - (winwd)))&SCMUL_WINMASK(winwd);            \
97       m_neg = -(uint32)((w >> ((winwd) - 1))&0x1u);                     \
98       ni <<= (winwd);                                                   \
99     }                                                                   \
100   }                                                                     \
101                                                                         \
102   /* Do the final window.  Just fix the sign and go. */                 \
103   for (k = 0; k < (winwd); k++)                                         \
104     dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);                                \
105   w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg);                   \
106   f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w);                            \
107   f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w);                            \
108   f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w);                            \
109   f##_condneg(&UX, &UX, m_neg);                                         \
110   addfn(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ);                         \
111 }
112
113 #define SCSIMMUL_WINLIM(winwd) (1 << (winwd))
114 #define SCSIMMUL_WINMASK(winwd) (SCSIMMUL_WINLIM(winwd) - 1)
115 #define SCSIMMUL_TABSZ(winwd) (1 << 2*(winwd))
116
117 #define DEFINE_SCSIMMUL(simmulfn, f, winwd,                             \
118                         scafwd, npiece, addfn, dblfn)                   \
119 void simmulfn(f *X, f *Y, f *Z,                                         \
120               const scaf_piece n0[npiece],                              \
121               const f *X0, const f *Y0, const f *Z0,                    \
122               const scaf_piece n1[npiece],                              \
123               const f *X1, const f *Y1, const f *Z1)                    \
124 {                                                                       \
125   f VX[SCSIMMUL_TABSZ(winwd)],                                          \
126     VY[SCSIMMUL_TABSZ(winwd)],                                          \
127     VZ[SCSIMMUL_TABSZ(winwd)];                                          \
128   f TX, TY, TZ, UX, UY, UZ;                                             \
129   unsigned i, j, k, w, ni0, ni1;                                        \
130                                                                         \
131   /* Build a table of small linear combinations. */                     \
132   f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1);           \
133   VX[1] = *X0; VX[SCSIMMUL_WINLIM(winwd)] = *X1;                        \
134   VY[1] = *Y0; VY[SCSIMMUL_WINLIM(winwd)] = *Y1;                        \
135   VZ[1] = *Z0; VZ[SCSIMMUL_WINLIM(winwd)] = *Z1;                        \
136   for (i = 2; i < SCSIMMUL_WINLIM(winwd); i <<= 1) {                    \
137     dblfn(&VX[i], &VY[i], &VZ[i],                                       \
138           &VX[i/2], &VY[i/2], &VZ[i/2]);                                \
139     dblfn(&VX[i*SCSIMMUL_WINLIM(winwd)],                                \
140           &VY[i*SCSIMMUL_WINLIM(winwd)],                                \
141           &VZ[i*SCSIMMUL_WINLIM(winwd)],                                \
142           &VX[i*SCSIMMUL_WINLIM(winwd)/2],                              \
143           &VY[i*SCSIMMUL_WINLIM(winwd)/2],                              \
144           &VZ[i*SCSIMMUL_WINLIM(winwd)/2]);                             \
145   }                                                                     \
146   for (i = 2; i < SCSIMMUL_TABSZ(winwd); i <<= 1) {                     \
147     for (j = 1; j < i; j++)                                             \
148       addfn(&VX[i + j], &VY[i + j], &VZ[i + j],                         \
149             &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]);            \
150   }                                                                     \
151                                                                         \
152   /* Do the multiplication. */                                          \
153   f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1);                    \
154   for (i = (npiece); i--; ) {                                           \
155     ni0 = n0[i]; ni1 = n1[i];                                           \
156                                                                         \
157     /* Work through each window in the scalar pieces. */                \
158     for (j = 0; j < (scafwd); j += (winwd)) {                           \
159                                                                         \
160       /* Shift along by a window. */                                    \
161       for (k = 0; k < (winwd); k++)                                     \
162         dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);                            \
163                                                                         \
164       /* Collect the next window from the scalars. */                   \
165       w = (((ni0 >> ((scafwd) - (winwd)))&                              \
166             SCSIMMUL_WINMASK(winwd)) |                                  \
167            ((ni1 >> ((scafwd) - 2*(winwd)))&                            \
168             (SCSIMMUL_WINMASK(winwd) << (winwd))));                     \
169       ni0 <<= (winwd); ni1 <<= (winwd);                                 \
170                                                                         \
171       /* Collect the entry from the table, and add. */                  \
172       f##_pickn(&UX, VX, SCSIMMUL_TABSZ(winwd), w);                     \
173       f##_pickn(&UY, VY, SCSIMMUL_TABSZ(winwd), w);                     \
174       f##_pickn(&UZ, VZ, SCSIMMUL_TABSZ(winwd), w);                     \
175       addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);               \
176     }                                                                   \
177   }                                                                     \
178                                                                         \
179   /* Done. */                                                           \
180   *X = TX; *Y = TY; *Z = TZ;                                            \
181 }
182
183 /*----- That's all, folks -------------------------------------------------*/
184
185 #ifdef __cplusplus
186   }
187 #endif
188
189 #endif