aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/gf128mul.c')
-rw-r--r--crypto/gf128mul.c171
1 files changed, 136 insertions, 35 deletions
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 5276607c72d0..f3d9f6da0767 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -44,7 +44,7 @@
44 --------------------------------------------------------------------------- 44 ---------------------------------------------------------------------------
45 Issue 31/01/2006 45 Issue 31/01/2006
46 46
47 This file provides fast multiplication in GF(128) as required by several 47 This file provides fast multiplication in GF(2^128) as required by several
48 cryptographic authentication modes 48 cryptographic authentication modes
49*/ 49*/
50 50
@@ -88,37 +88,52 @@
88 q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ 88 q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89} 89}
90 90
91/* Given the value i in 0..255 as the byte overflow when a field element 91/*
92 in GHASH is multiplied by x^8, this function will return the values that 92 * Given a value i in 0..255 as the byte overflow when a field element
93 are generated in the lo 16-bit word of the field value by applying the 93 * in GF(2^128) is multiplied by x^8, the following macro returns the
94 modular polynomial. The values lo_byte and hi_byte are returned via the 94 * 16-bit value that must be XOR-ed into the low-degree end of the
95 macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into 95 * product to reduce it modulo the irreducible polynomial x^128 + x^7 +
96 memory as required by a suitable definition of this macro operating on 96 * x^2 + x + 1.
97 the table above 97 *
98*/ 98 * There are two versions of the macro, and hence two tables: one for
99 99 * the "be" convention where the highest-order bit is the coefficient of
100#define xx(p, q) 0x##p##q 100 * the highest-degree polynomial term, and one for the "le" convention
101 * where the highest-order bit is the coefficient of the lowest-degree
102 * polynomial term. In both cases the values are stored in CPU byte
103 * endianness such that the coefficients are ordered consistently across
104 * bytes, i.e. in the "be" table bits 15..0 of the stored value
105 * correspond to the coefficients of x^15..x^0, and in the "le" table
106 * bits 15..0 correspond to the coefficients of x^0..x^15.
107 *
108 * Therefore, provided that the appropriate byte endianness conversions
109 * are done by the multiplication functions (and these must be in place
110 * anyway to support both little endian and big endian CPUs), the "be"
111 * table can be used for multiplications of both "bbe" and "ble"
112 * elements, and the "le" table can be used for multiplications of both
113 * "lle" and "lbe" elements.
114 */
101 115
102#define xda_bbe(i) ( \ 116#define xda_be(i) ( \
103 (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \ 117 (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
104 (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \ 118 (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
105 (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \ 119 (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
106 (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \ 120 (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
107) 121)
108 122
109#define xda_lle(i) ( \ 123#define xda_le(i) ( \
110 (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \ 124 (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
111 (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \ 125 (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
112 (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \ 126 (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
113 (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \ 127 (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
114) 128)
115 129
116static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle); 130static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
117static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe); 131static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
118 132
119/* These functions multiply a field element by x, by x^4 and by x^8 133/*
120 * in the polynomial field representation. It uses 32-bit word operations 134 * The following functions multiply a field element by x or by x^8 in
121 * to gain speed but compensates for machine endianess and hence works 135 * the polynomial field representation. They use 64-bit word operations
136 * to gain speed but compensate for machine endianness and hence work
122 * correctly on both styles of machine. 137 * correctly on both styles of machine.
123 */ 138 */
124 139
@@ -126,7 +141,7 @@ static void gf128mul_x_lle(be128 *r, const be128 *x)
126{ 141{
127 u64 a = be64_to_cpu(x->a); 142 u64 a = be64_to_cpu(x->a);
128 u64 b = be64_to_cpu(x->b); 143 u64 b = be64_to_cpu(x->b);
129 u64 _tt = gf128mul_table_lle[(b << 7) & 0xff]; 144 u64 _tt = gf128mul_table_le[(b << 7) & 0xff];
130 145
131 r->b = cpu_to_be64((b >> 1) | (a << 63)); 146 r->b = cpu_to_be64((b >> 1) | (a << 63));
132 r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); 147 r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
@@ -136,7 +151,7 @@ static void gf128mul_x_bbe(be128 *r, const be128 *x)
136{ 151{
137 u64 a = be64_to_cpu(x->a); 152 u64 a = be64_to_cpu(x->a);
138 u64 b = be64_to_cpu(x->b); 153 u64 b = be64_to_cpu(x->b);
139 u64 _tt = gf128mul_table_bbe[a >> 63]; 154 u64 _tt = gf128mul_table_be[a >> 63];
140 155
141 r->a = cpu_to_be64((a << 1) | (b >> 63)); 156 r->a = cpu_to_be64((a << 1) | (b >> 63));
142 r->b = cpu_to_be64((b << 1) ^ _tt); 157 r->b = cpu_to_be64((b << 1) ^ _tt);
@@ -146,7 +161,7 @@ void gf128mul_x_ble(be128 *r, const be128 *x)
146{ 161{
147 u64 a = le64_to_cpu(x->a); 162 u64 a = le64_to_cpu(x->a);
148 u64 b = le64_to_cpu(x->b); 163 u64 b = le64_to_cpu(x->b);
149 u64 _tt = gf128mul_table_bbe[b >> 63]; 164 u64 _tt = gf128mul_table_be[b >> 63];
150 165
151 r->a = cpu_to_le64((a << 1) ^ _tt); 166 r->a = cpu_to_le64((a << 1) ^ _tt);
152 r->b = cpu_to_le64((b << 1) | (a >> 63)); 167 r->b = cpu_to_le64((b << 1) | (a >> 63));
@@ -157,7 +172,7 @@ static void gf128mul_x8_lle(be128 *x)
157{ 172{
158 u64 a = be64_to_cpu(x->a); 173 u64 a = be64_to_cpu(x->a);
159 u64 b = be64_to_cpu(x->b); 174 u64 b = be64_to_cpu(x->b);
160 u64 _tt = gf128mul_table_lle[b & 0xff]; 175 u64 _tt = gf128mul_table_le[b & 0xff];
161 176
162 x->b = cpu_to_be64((b >> 8) | (a << 56)); 177 x->b = cpu_to_be64((b >> 8) | (a << 56));
163 x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); 178 x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
@@ -167,12 +182,22 @@ static void gf128mul_x8_bbe(be128 *x)
167{ 182{
168 u64 a = be64_to_cpu(x->a); 183 u64 a = be64_to_cpu(x->a);
169 u64 b = be64_to_cpu(x->b); 184 u64 b = be64_to_cpu(x->b);
170 u64 _tt = gf128mul_table_bbe[a >> 56]; 185 u64 _tt = gf128mul_table_be[a >> 56];
171 186
172 x->a = cpu_to_be64((a << 8) | (b >> 56)); 187 x->a = cpu_to_be64((a << 8) | (b >> 56));
173 x->b = cpu_to_be64((b << 8) ^ _tt); 188 x->b = cpu_to_be64((b << 8) ^ _tt);
174} 189}
175 190
191static void gf128mul_x8_ble(be128 *x)
192{
193 u64 a = le64_to_cpu(x->b);
194 u64 b = le64_to_cpu(x->a);
195 u64 _tt = gf128mul_table_be[a >> 56];
196
197 x->b = cpu_to_le64((a << 8) | (b >> 56));
198 x->a = cpu_to_le64((b << 8) ^ _tt);
199}
200
176void gf128mul_lle(be128 *r, const be128 *b) 201void gf128mul_lle(be128 *r, const be128 *b)
177{ 202{
178 be128 p[8]; 203 be128 p[8];
@@ -249,9 +274,48 @@ void gf128mul_bbe(be128 *r, const be128 *b)
249} 274}
250EXPORT_SYMBOL(gf128mul_bbe); 275EXPORT_SYMBOL(gf128mul_bbe);
251 276
277void gf128mul_ble(be128 *r, const be128 *b)
278{
279 be128 p[8];
280 int i;
281
282 p[0] = *r;
283 for (i = 0; i < 7; ++i)
284 gf128mul_x_ble((be128 *)&p[i + 1], (be128 *)&p[i]);
285
286 memset(r, 0, sizeof(*r));
287 for (i = 0;;) {
288 u8 ch = ((u8 *)b)[15 - i];
289
290 if (ch & 0x80)
291 be128_xor(r, r, &p[7]);
292 if (ch & 0x40)
293 be128_xor(r, r, &p[6]);
294 if (ch & 0x20)
295 be128_xor(r, r, &p[5]);
296 if (ch & 0x10)
297 be128_xor(r, r, &p[4]);
298 if (ch & 0x08)
299 be128_xor(r, r, &p[3]);
300 if (ch & 0x04)
301 be128_xor(r, r, &p[2]);
302 if (ch & 0x02)
303 be128_xor(r, r, &p[1]);
304 if (ch & 0x01)
305 be128_xor(r, r, &p[0]);
306
307 if (++i >= 16)
308 break;
309
310 gf128mul_x8_ble(r);
311 }
312}
313EXPORT_SYMBOL(gf128mul_ble);
314
315
252/* This version uses 64k bytes of table space. 316/* This version uses 64k bytes of table space.
253 A 16 byte buffer has to be multiplied by a 16 byte key 317 A 16 byte buffer has to be multiplied by a 16 byte key
254 value in GF(128). If we consider a GF(128) value in 318 value in GF(2^128). If we consider a GF(2^128) value in
255 the buffer's lowest byte, we can construct a table of 319 the buffer's lowest byte, we can construct a table of
256 the 256 16 byte values that result from the 256 values 320 the 256 16 byte values that result from the 256 values
257 of this byte. This requires 4096 bytes. But we also 321 of this byte. This requires 4096 bytes. But we also
@@ -352,8 +416,8 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
352 int i; 416 int i;
353 417
354 for (i = 0; i < 16; i++) 418 for (i = 0; i < 16; i++)
355 kfree(t->t[i]); 419 kzfree(t->t[i]);
356 kfree(t); 420 kzfree(t);
357} 421}
358EXPORT_SYMBOL(gf128mul_free_64k); 422EXPORT_SYMBOL(gf128mul_free_64k);
359 423
@@ -385,7 +449,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe);
385 449
386/* This version uses 4k bytes of table space. 450/* This version uses 4k bytes of table space.
387 A 16 byte buffer has to be multiplied by a 16 byte key 451 A 16 byte buffer has to be multiplied by a 16 byte key
388 value in GF(128). If we consider a GF(128) value in a 452 value in GF(2^128). If we consider a GF(2^128) value in a
389 single byte, we can construct a table of the 256 16 byte 453 single byte, we can construct a table of the 256 16 byte
390 values that result from the 256 values of this byte. 454 values that result from the 256 values of this byte.
391 This requires 4096 bytes. If we take the highest byte in 455 This requires 4096 bytes. If we take the highest byte in
@@ -443,6 +507,28 @@ out:
443} 507}
444EXPORT_SYMBOL(gf128mul_init_4k_bbe); 508EXPORT_SYMBOL(gf128mul_init_4k_bbe);
445 509
510struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g)
511{
512 struct gf128mul_4k *t;
513 int j, k;
514
515 t = kzalloc(sizeof(*t), GFP_KERNEL);
516 if (!t)
517 goto out;
518
519 t->t[1] = *g;
520 for (j = 1; j <= 64; j <<= 1)
521 gf128mul_x_ble(&t->t[j + j], &t->t[j]);
522
523 for (j = 2; j < 256; j += j)
524 for (k = 1; k < j; ++k)
525 be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
526
527out:
528 return t;
529}
530EXPORT_SYMBOL(gf128mul_init_4k_ble);
531
446void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) 532void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
447{ 533{
448 u8 *ap = (u8 *)a; 534 u8 *ap = (u8 *)a;
@@ -473,5 +559,20 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
473} 559}
474EXPORT_SYMBOL(gf128mul_4k_bbe); 560EXPORT_SYMBOL(gf128mul_4k_bbe);
475 561
562void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t)
563{
564 u8 *ap = (u8 *)a;
565 be128 r[1];
566 int i = 15;
567
568 *r = t->t[ap[15]];
569 while (i--) {
570 gf128mul_x8_ble(r);
571 be128_xor(r, r, &t->t[ap[i]]);
572 }
573 *a = *r;
574}
575EXPORT_SYMBOL(gf128mul_4k_ble);
576
476MODULE_LICENSE("GPL"); 577MODULE_LICENSE("GPL");
477MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); 578MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");