blob: 9464df009343973adc4725fd12d6ae64dbb6dfe2 [file] [log] [blame]
AKASHI Takahirod4aece12020-02-21 15:12:58 +09001// SPDX-License-Identifier: GPL-2.0+ and MIT
2/*
3 * RSA library - generate parameters for a public key
4 *
5 * Copyright (c) 2019 Linaro Limited
6 * Author: AKASHI Takahiro
7 *
8 * Big number routines in this file come from BearSSL:
9 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
10 */
11
12#include <common.h>
13#include <image.h>
14#include <malloc.h>
15#include <asm/byteorder.h>
16#include <crypto/internal/rsa.h>
17#include <u-boot/rsa-mod-exp.h>
18
19/**
20 * br_dec16be() - Convert 16-bit big-endian integer to native
21 * @src: Pointer to data
22 * Return: Native-endian integer
23 */
24static unsigned br_dec16be(const void *src)
25{
26 return be16_to_cpup(src);
27}
28
29/**
30 * br_dec32be() - Convert 32-bit big-endian integer to native
31 * @src: Pointer to data
32 * Return: Native-endian integer
33 */
34static uint32_t br_dec32be(const void *src)
35{
36 return be32_to_cpup(src);
37}
38
39/**
40 * br_enc32be() - Convert native 32-bit integer to big-endian
41 * @dst: Pointer to buffer to store big-endian integer in
42 * @x: Native 32-bit integer
43 */
44static void br_enc32be(void *dst, uint32_t x)
45{
46 __be32 tmp;
47
48 tmp = cpu_to_be32(x);
49 memcpy(dst, &tmp, sizeof(tmp));
50}
51
52/* from BearSSL's src/inner.h */
53
54/*
55 * Negate a boolean.
56 */
57static uint32_t NOT(uint32_t ctl)
58{
59 return ctl ^ 1;
60}
61
62/*
63 * Multiplexer: returns x if ctl == 1, y if ctl == 0.
64 */
65static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
66{
67 return y ^ (-ctl & (x ^ y));
68}
69
70/*
71 * Equality check: returns 1 if x == y, 0 otherwise.
72 */
73static uint32_t EQ(uint32_t x, uint32_t y)
74{
75 uint32_t q;
76
77 q = x ^ y;
78 return NOT((q | -q) >> 31);
79}
80
81/*
82 * Inequality check: returns 1 if x != y, 0 otherwise.
83 */
84static uint32_t NEQ(uint32_t x, uint32_t y)
85{
86 uint32_t q;
87
88 q = x ^ y;
89 return (q | -q) >> 31;
90}
91
92/*
93 * Comparison: returns 1 if x > y, 0 otherwise.
94 */
95static uint32_t GT(uint32_t x, uint32_t y)
96{
97 /*
98 * If both x < 2^31 and y < 2^31, then y-x will have its high
99 * bit set if x > y, cleared otherwise.
100 *
101 * If either x >= 2^31 or y >= 2^31 (but not both), then the
102 * result is the high bit of x.
103 *
104 * If both x >= 2^31 and y >= 2^31, then we can virtually
105 * subtract 2^31 from both, and we are back to the first case.
106 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
107 * fine.
108 */
109 uint32_t z;
110
111 z = y - x;
112 return (z ^ ((x ^ y) & (x ^ z))) >> 31;
113}
114
115/*
116 * Compute the bit length of a 32-bit integer. Returned value is between 0
117 * and 32 (inclusive).
118 */
119static uint32_t BIT_LENGTH(uint32_t x)
120{
121 uint32_t k, c;
122
123 k = NEQ(x, 0);
124 c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
125 c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3;
126 c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2;
127 c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1;
128 k += GT(x, 0x0001);
129 return k;
130}
131
132#define GE(x, y) NOT(GT(y, x))
133#define LT(x, y) GT(y, x)
134#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y))
135
136/*
137 * Integers 'i32'
138 * --------------
139 *
140 * The 'i32' functions implement computations on big integers using
141 * an internal representation as an array of 32-bit integers. For
142 * an array x[]:
143 * -- x[0] contains the "announced bit length" of the integer
144 * -- x[1], x[2]... contain the value in little-endian order (x[1]
145 * contains the least significant 32 bits)
146 *
147 * Multiplications rely on the elementary 32x32->64 multiplication.
148 *
149 * The announced bit length specifies the number of bits that are
150 * significant in the subsequent 32-bit words. Unused bits in the
151 * last (most significant) word are set to 0; subsequent words are
152 * uninitialized and need not exist at all.
153 *
154 * The execution time and memory access patterns of all computations
155 * depend on the announced bit length, but not on the actual word
156 * values. For modular integers, the announced bit length of any integer
157 * modulo n is equal to the actual bit length of n; thus, computations
158 * on modular integers are "constant-time" (only the modulus length may
159 * leak).
160 */
161
162/*
163 * Extract one word from an integer. The offset is counted in bits.
164 * The word MUST entirely fit within the word elements corresponding
165 * to the announced bit length of a[].
166 */
167static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
168{
169 size_t u;
170 unsigned j;
171
172 u = (size_t)(off >> 5) + 1;
173 j = (unsigned)off & 31;
174 if (j == 0) {
175 return a[u];
176 } else {
177 return (a[u] >> j) | (a[u + 1] << (32 - j));
178 }
179}
180
181/* from BearSSL's src/int/i32_bitlen.c */
182
183/*
184 * Compute the actual bit length of an integer. The argument x should
185 * point to the first (least significant) value word of the integer.
186 * The len 'xlen' contains the number of 32-bit words to access.
187 *
188 * CT: value or length of x does not leak.
189 */
190static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
191{
192 uint32_t tw, twk;
193
194 tw = 0;
195 twk = 0;
196 while (xlen -- > 0) {
197 uint32_t w, c;
198
199 c = EQ(tw, 0);
200 w = x[xlen];
201 tw = MUX(c, w, tw);
202 twk = MUX(c, (uint32_t)xlen, twk);
203 }
204 return (twk << 5) + BIT_LENGTH(tw);
205}
206
207/* from BearSSL's src/int/i32_decode.c */
208
209/*
210 * Decode an integer from its big-endian unsigned representation. The
211 * "true" bit length of the integer is computed, but all words of x[]
212 * corresponding to the full 'len' bytes of the source are set.
213 *
214 * CT: value or length of x does not leak.
215 */
216static void br_i32_decode(uint32_t *x, const void *src, size_t len)
217{
218 const unsigned char *buf;
219 size_t u, v;
220
221 buf = src;
222 u = len;
223 v = 1;
224 for (;;) {
225 if (u < 4) {
226 uint32_t w;
227
228 if (u < 2) {
229 if (u == 0) {
230 break;
231 } else {
232 w = buf[0];
233 }
234 } else {
235 if (u == 2) {
236 w = br_dec16be(buf);
237 } else {
238 w = ((uint32_t)buf[0] << 16)
239 | br_dec16be(buf + 1);
240 }
241 }
242 x[v ++] = w;
243 break;
244 } else {
245 u -= 4;
246 x[v ++] = br_dec32be(buf + u);
247 }
248 }
249 x[0] = br_i32_bit_length(x + 1, v - 1);
250}
251
252/* from BearSSL's src/int/i32_encode.c */
253
254/*
255 * Encode an integer into its big-endian unsigned representation. The
256 * output length in bytes is provided (parameter 'len'); if the length
257 * is too short then the integer is appropriately truncated; if it is
258 * too long then the extra bytes are set to 0.
259 */
260static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
261{
262 unsigned char *buf;
263 size_t k;
264
265 buf = dst;
266
267 /*
268 * Compute the announced size of x in bytes; extra bytes are
269 * filled with zeros.
270 */
271 k = (x[0] + 7) >> 3;
272 while (len > k) {
273 *buf ++ = 0;
274 len --;
275 }
276
277 /*
278 * Now we use k as index within x[]. That index starts at 1;
279 * we initialize it to the topmost complete word, and process
280 * any remaining incomplete word.
281 */
282 k = (len + 3) >> 2;
283 switch (len & 3) {
284 case 3:
285 *buf ++ = x[k] >> 16;
286 /* fall through */
287 case 2:
288 *buf ++ = x[k] >> 8;
289 /* fall through */
290 case 1:
291 *buf ++ = x[k];
292 k --;
293 }
294
295 /*
296 * Encode all complete words.
297 */
298 while (k > 0) {
299 br_enc32be(buf, x[k]);
300 k --;
301 buf += 4;
302 }
303}
304
305/* from BearSSL's src/int/i32_ninv32.c */
306
307/*
308 * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
309 */
310static uint32_t br_i32_ninv32(uint32_t x)
311{
312 uint32_t y;
313
314 y = 2 - x;
315 y *= 2 - y * x;
316 y *= 2 - y * x;
317 y *= 2 - y * x;
318 y *= 2 - y * x;
319 return MUX(x & 1, -y, 0);
320}
321
322/* from BearSSL's src/int/i32_add.c */
323
324/*
325 * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
326 * is unmodified, but the carry is still computed and returned. The
327 * arrays a[] and b[] MUST have the same announced bit length.
328 *
329 * a[] and b[] MAY be the same array, but partial overlap is not allowed.
330 */
331static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
332{
333 uint32_t cc;
334 size_t u, m;
335
336 cc = 0;
337 m = (a[0] + 63) >> 5;
338 for (u = 1; u < m; u ++) {
339 uint32_t aw, bw, naw;
340
341 aw = a[u];
342 bw = b[u];
343 naw = aw + bw + cc;
344
345 /*
346 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
347 * AND the carry was already 1.
348 */
349 cc = (cc & EQ(naw, aw)) | LT(naw, aw);
350 a[u] = MUX(ctl, naw, aw);
351 }
352 return cc;
353}
354
355/* from BearSSL's src/int/i32_sub.c */
356
357/*
358 * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
359 * then a[] is unmodified, but the carry is still computed and returned.
360 * The arrays a[] and b[] MUST have the same announced bit length.
361 *
362 * a[] and b[] MAY be the same array, but partial overlap is not allowed.
363 */
364static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
365{
366 uint32_t cc;
367 size_t u, m;
368
369 cc = 0;
370 m = (a[0] + 63) >> 5;
371 for (u = 1; u < m; u ++) {
372 uint32_t aw, bw, naw;
373
374 aw = a[u];
375 bw = b[u];
376 naw = aw - bw - cc;
377
378 /*
379 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
380 * AND the carry was already 1.
381 */
382 cc = (cc & EQ(naw, aw)) | GT(naw, aw);
383 a[u] = MUX(ctl, naw, aw);
384 }
385 return cc;
386}
387
388/* from BearSSL's src/int/i32_div32.c */
389
390/*
391 * Constant-time division. The dividend hi:lo is divided by the
392 * divisor d; the quotient is returned and the remainder is written
393 * in *r. If hi == d, then the quotient does not fit on 32 bits;
394 * returned value is thus truncated. If hi > d, returned values are
395 * indeterminate.
396 */
397static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
398{
399 /* TODO: optimize this */
400 uint32_t q;
401 uint32_t ch, cf;
402 int k;
403
404 q = 0;
405 ch = EQ(hi, d);
406 hi = MUX(ch, 0, hi);
407 for (k = 31; k > 0; k --) {
408 int j;
409 uint32_t w, ctl, hi2, lo2;
410
411 j = 32 - k;
412 w = (hi << j) | (lo >> k);
413 ctl = GE(w, d) | (hi >> k);
414 hi2 = (w - d) >> j;
415 lo2 = lo - (d << k);
416 hi = MUX(ctl, hi2, hi);
417 lo = MUX(ctl, lo2, lo);
418 q |= ctl << k;
419 }
420 cf = GE(lo, d) | hi;
421 q |= cf;
422 *r = MUX(cf, lo - d, lo);
423 return q;
424}
425
426/*
427 * Wrapper for br_divrem(); the remainder is returned, and the quotient
428 * is discarded.
429 */
430static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
431{
432 uint32_t r;
433
434 br_divrem(hi, lo, d, &r);
435 return r;
436}
437
438/*
439 * Wrapper for br_divrem(); the quotient is returned, and the remainder
440 * is discarded.
441 */
442static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
443{
444 uint32_t r;
445
446 return br_divrem(hi, lo, d, &r);
447}
448
449/* from BearSSL's src/int/i32_muladd.c */
450
451/*
452 * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
453 * function assumes that x[] and m[] have the same announced bit
454 * length, and the announced bit length of m[] matches its true
455 * bit length.
456 *
457 * x[] and m[] MUST be distinct arrays.
458 *
459 * CT: only the common announced bit length of x and m leaks, not
460 * the values of x, z or m.
461 */
462static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
463{
464 uint32_t m_bitlen;
465 size_t u, mlen;
466 uint32_t a0, a1, b0, hi, g, q, tb;
467 uint32_t chf, clow, under, over;
468 uint64_t cc;
469
470 /*
471 * We can test on the modulus bit length since we accept to
472 * leak that length.
473 */
474 m_bitlen = m[0];
475 if (m_bitlen == 0) {
476 return;
477 }
478 if (m_bitlen <= 32) {
479 x[1] = br_rem(x[1], z, m[1]);
480 return;
481 }
482 mlen = (m_bitlen + 31) >> 5;
483
484 /*
485 * Principle: we estimate the quotient (x*2^32+z)/m by
486 * doing a 64/32 division with the high words.
487 *
488 * Let:
489 * w = 2^32
490 * a = (w*a0 + a1) * w^N + a2
491 * b = b0 * w^N + b2
492 * such that:
493 * 0 <= a0 < w
494 * 0 <= a1 < w
495 * 0 <= a2 < w^N
496 * w/2 <= b0 < w
497 * 0 <= b2 < w^N
498 * a < w*b
499 * I.e. the two top words of a are a0:a1, the top word of b is
500 * b0, we ensured that b0 is "full" (high bit set), and a is
501 * such that the quotient q = a/b fits on one word (0 <= q < w).
502 *
503 * If a = b*q + r (with 0 <= r < q), we can estimate q by
504 * doing an Euclidean division on the top words:
505 * a0*w+a1 = b0*u + v (with 0 <= v < w)
506 * Then the following holds:
507 * 0 <= u <= w
508 * u-2 <= q <= u
509 */
510 a0 = br_i32_word(x, m_bitlen - 32);
511 hi = x[mlen];
512 memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
513 x[1] = z;
514 a1 = br_i32_word(x, m_bitlen - 32);
515 b0 = br_i32_word(m, m_bitlen - 32);
516
517 /*
518 * We estimate a divisor q. If the quotient returned by br_div()
519 * is g:
520 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
521 * -- Otherwise:
522 * -- if g == 0 then we set q = 0;
523 * -- otherwise, we set q = g - 1.
524 * The properties described above then ensure that the true
525 * quotient is q-1, q or q+1.
526 */
527 g = br_div(a0, a1, b0);
528 q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
529
530 /*
531 * We subtract q*m from x (with the extra high word of value 'hi').
532 * Since q may be off by 1 (in either direction), we may have to
533 * add or subtract m afterwards.
534 *
535 * The 'tb' flag will be true (1) at the end of the loop if the
536 * result is greater than or equal to the modulus (not counting
537 * 'hi' or the carry).
538 */
539 cc = 0;
540 tb = 1;
541 for (u = 1; u <= mlen; u ++) {
542 uint32_t mw, zw, xw, nxw;
543 uint64_t zl;
544
545 mw = m[u];
546 zl = MUL(mw, q) + cc;
547 cc = (uint32_t)(zl >> 32);
548 zw = (uint32_t)zl;
549 xw = x[u];
550 nxw = xw - zw;
551 cc += (uint64_t)GT(nxw, xw);
552 x[u] = nxw;
553 tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
554 }
555
556 /*
557 * If we underestimated q, then either cc < hi (one extra bit
558 * beyond the top array word), or cc == hi and tb is true (no
559 * extra bit, but the result is not lower than the modulus). In
560 * these cases we must subtract m once.
561 *
562 * Otherwise, we may have overestimated, which will show as
563 * cc > hi (thus a negative result). Correction is adding m once.
564 */
565 chf = (uint32_t)(cc >> 32);
566 clow = (uint32_t)cc;
567 over = chf | GT(clow, hi);
568 under = ~over & (tb | (~chf & LT(clow, hi)));
569 br_i32_add(x, m, over);
570 br_i32_sub(x, m, under);
571}
572
573/* from BearSSL's src/int/i32_reduce.c */
574
575/*
576 * Reduce an integer (a[]) modulo another (m[]). The result is written
577 * in x[] and its announced bit length is set to be equal to that of m[].
578 *
579 * x[] MUST be distinct from a[] and m[].
580 *
581 * CT: only announced bit lengths leak, not values of x, a or m.
582 */
583static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
584{
585 uint32_t m_bitlen, a_bitlen;
586 size_t mlen, alen, u;
587
588 m_bitlen = m[0];
589 mlen = (m_bitlen + 31) >> 5;
590
591 x[0] = m_bitlen;
592 if (m_bitlen == 0) {
593 return;
594 }
595
596 /*
597 * If the source is shorter, then simply copy all words from a[]
598 * and zero out the upper words.
599 */
600 a_bitlen = a[0];
601 alen = (a_bitlen + 31) >> 5;
602 if (a_bitlen < m_bitlen) {
603 memcpy(x + 1, a + 1, alen * sizeof *a);
604 for (u = alen; u < mlen; u ++) {
605 x[u + 1] = 0;
606 }
607 return;
608 }
609
610 /*
611 * The source length is at least equal to that of the modulus.
612 * We must thus copy N-1 words, and input the remaining words
613 * one by one.
614 */
615 memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
616 x[mlen] = 0;
617 for (u = 1 + alen - mlen; u > 0; u --) {
618 br_i32_muladd_small(x, a[u], m);
619 }
620}
621
622/**
623 * rsa_free_key_prop() - Free key properties
624 * @prop: Pointer to struct key_prop
625 *
626 * This function frees all the memories allocated by rsa_gen_key_prop().
627 */
628void rsa_free_key_prop(struct key_prop *prop)
629{
630 if (!prop)
631 return;
632
633 free((void *)prop->modulus);
634 free((void *)prop->public_exponent);
635 free((void *)prop->rr);
636
637 free(prop);
638}
639
640/**
641 * rsa_gen_key_prop() - Generate key properties of RSA public key
642 * @key: Specifies key data in DER format
643 * @keylen: Length of @key
644 * @prop: Generated key property
645 *
646 * This function takes a blob of encoded RSA public key data in DER
647 * format, parse it and generate all the relevant properties
648 * in key_prop structure.
649 * Return a pointer to struct key_prop in @prop on success.
650 *
651 * Return: 0 on success, negative on error
652 */
653int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop)
654{
655 struct rsa_key rsa_key;
656 uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
657 const int max_rsa_size = 4096;
658 int rlen, i, ret;
659
660 *prop = calloc(sizeof(**prop), 1);
661 n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
662 rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
663 rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
664 if (!(*prop) || !n || !rr || !rrtmp) {
665 ret = -ENOMEM;
666 goto err;
667 }
668
669 ret = rsa_parse_pub_key(&rsa_key, key, keylen);
670 if (ret)
671 goto err;
672
673 /* modulus */
674 /* removing leading 0's */
675 for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
676 ;
677 (*prop)->num_bits = (rsa_key.n_sz - i) * 8;
678 (*prop)->modulus = malloc(rsa_key.n_sz - i);
679 if (!(*prop)->modulus) {
680 ret = -ENOMEM;
681 goto err;
682 }
683 memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
684
685 /* exponent */
686 (*prop)->public_exponent = calloc(1, sizeof(uint64_t));
687 if (!(*prop)->public_exponent) {
688 ret = -ENOMEM;
689 goto err;
690 }
691 memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
692 - rsa_key.e_sz,
693 rsa_key.e, rsa_key.e_sz);
694 (*prop)->exp_len = rsa_key.e_sz;
695
696 /* n0 inverse */
697 br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
698 (*prop)->n0inv = br_i32_ninv32(n[1]);
699
700 /* R^2 mod n; R = 2^(num_bits) */
701 rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
702 rr[0] = 0;
703 *(uint8_t *)&rr[0] = (1 << (rlen % 8));
704 for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
705 rr[i] = 0;
706 br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
707 br_i32_reduce(rr, rrtmp, n);
708
709 rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
710 (*prop)->rr = malloc(rlen);
711 if (!(*prop)->rr) {
712 ret = -ENOMEM;
713 goto err;
714 }
715 br_i32_encode((void *)(*prop)->rr, rlen, rr);
716
717 return 0;
718
719err:
720 free(n);
721 free(rr);
722 free(rrtmp);
723 rsa_free_key_prop(*prop);
724 return ret;
725}