[Dnsmasq-discuss] [PATCH 6/6] Use ChaCha8-based {rand16, rand32, rand64}
Leonid Evdokimov
leon at darkk.net.ru
Fri Oct 4 21:14:53 UTC 2024
surf() takes 18256 cycles to generate u32[8*2] and needs ~290 opcodes.
chacha8() either takes 4086 cycles to generate same amount of entropy
with just ~100 opcodes, or 1944 cycles with 320 opcodes depending
on QUARTERROUND() inlining.
`gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16` gives following numbers
for MediaTek MT7620N running at 580MHz:
rand16: 1.85 -> 0.294 us/call, 6.3x speedup
rand32: 1.85 -> 0.520 us/call, 3.5x speedup
rand64: 3.60 -> 0.959 us/call, 3.7x speedup
---
Makefile | 1 +
src/charand.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++
src/charand.h | 42 +++++++++++
src/dnsmasq.h | 9 ++-
src/util.c | 117 +++---------------------------
5 files changed, 250 insertions(+), 112 deletions(-)
create mode 100644 src/charand.c
create mode 100644 src/charand.h
diff --git Makefile Makefile
index a9bcab90..b78f03ae 100644
--- Makefile
+++ Makefile
@@ -85,6 +85,7 @@ objs = cache.o rfc1035.o util.o option.o forward.o network.o \
dhcp-common.o outpacket.o radv.o slaac.o auth.o ipset.o pattern.o \
domain.o dnssec.o blockdata.o tables.o loop.o inotify.o \
poll.o rrfilter.o edns0.o arp.o crypto.o dump.o ubus.o \
+ charand.o \
metrics.o hash-questions.o domain-match.o nftset.o
hdrs = dnsmasq.h config.h dhcp-protocol.h dhcp6-protocol.h \
diff --git src/charand.c src/charand.c
new file mode 100644
index 00000000..a2eed576
--- /dev/null
+++ src/charand.c
@@ -0,0 +1,193 @@
+/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991, or
+ (at your option) version 3 dated 29 June, 2007.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "config.h"
+#include "charand.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <time.h>
+#include <sys/auxv.h>
+#if !defined(HAVE_GETENTROPY) && defined(RANDFILE)
+# include <stdio.h>
+#endif
+
+// ChaCha8-based random number generator. That's NOT Golang's chacha8rand
+// described at https://github.com/C2SP/C2SP/blob/main/chacha8rand.md
+// as the most popular OpenWRT boxes are not that rich to have SIMD :-)
+
+// Non-standard libc getentropy() might use getrandom() avoiding filesystem access, that's great
+// for jails and chroots. However, a fallback implemetation is required for older systems that have
+// no getentropy() in libc. Also, getentropy() might block if the kernel has not initialized random
+// pool yet. However, dnsmasq is never started that early during the OpenWRT boot process (at least).
+
+#if !defined(HAVE_GETENTROPY) && defined(RANDFILE)
+
+static int getentropy_fallback(void *buffer, size_t length)
+{
+ FILE *fd = fopen(RANDFILE, "r")
+ if (!fd)
+ return -1;
+ const size_t done = fread(fd, buffer, 1, length);
+ fclose(fd);
+ return done == length ? 0 : -1;
+}
+
+#define getentropy(a, b) getentropy_fallback(a, b)
+
+#endif // !defined(HAVE_GETENTROPY)
+
+static void chacha8(void);
+
+static uint32_t CHA[16] = { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 };
+static uint32_t CHAO[16];
+static unsigned int CHAOBYEs = 0;
+
+static void nonce_time(void) { CHA[14] = (uint32_t)time(NULL); };
+static void nonce_proc(void) { CHA[15] = getpid(); };
+
+int charand_init(void)
+{
+ if (getentropy(CHA + 4, 8 * sizeof(uint32_t)))
+ return -1;
+ // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 146 years at 4 GHz rate.
+ // So, counter is never reset during the process lifetime.
+ CHA[12] = CHA[13] = 0;
+ nonce_time();
+ nonce_proc();
+ chacha8();
+ return 0;
+}
+
+bool charand_isinit(void)
+{
+ return CHA[15] != 0;
+}
+
+void charand_rekey(void)
+{
+ if (CHAOBYEs < 32)
+ chacha8();
+ for (int i = 0; i < 8; i++)
+ CHA[4+i] ^= CHAO[i];
+ nonce_time();
+ chacha8();
+}
+
+// This atfork handler avoids getentropy() call to preserve system entropy pool.
+void charand_atfork_child(void)
+{
+ const uint32_t *r32x4 =
+#ifdef AT_RANDOM
+ (const uint32_t*)getauxval(AT_RANDOM); // Use the entropy OS provides. Hopefully, ptr is aligned :-)
+#else
+# warning No getauxval(AT_RANDOM) available
+ NULL;
+#endif
+ if (r32x4)
+ for (unsigned int i = 0; i < 16 / sizeof(uint32_t); i++)
+ CHA[4+i] ^= r32x4[i]; // scramble 256-bit key with 128-bit process-wide random
+ nonce_proc();
+ chacha8();
+ charand_rekey();
+}
+
+static inline uint32_t rotl32(uint32_t x, unsigned b) { return (x << b) | (x >> (32 - b)); }
+
+#define QUARTERROUND(a, b, c, d) do { \
+ CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d], 16); \
+ CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b], 12); \
+ CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d], 8); \
+ CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b], 7); \
+} while (0)
+
+// These are two options for the ChaCha8 quarter-round implementation of different size and speed.
+// There is also a obvious middleground:
+// static void chaqround(int a, int b, int c, int d) { QUARTERROUND(a, b, c, d); }
+// But it takes 3654 cycles and 130 opcodes, so it's kinda pointless.
+
+#ifdef __OPTIMIZE_SIZE__
+
+// 4086 cycles, 100 opcodes for `gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16`
+# define quarterround(a, b, c, d) quarterround_1arg((a) | ((b) << 4) | ((c) << 8) | ((d) << 12))
+
+static void quarterround_1arg(unsigned ndx)
+{
+ const unsigned a = 0x0F & ndx;
+ const unsigned b = 0x0F & (ndx >> 4);
+ const unsigned c = 0x0F & (ndx >> 8);
+ const unsigned d = ndx >> 12;
+ QUARTERROUND(a, b, c, d);
+}
+
+#else
+
+// 1944 cycles, 320 opcodes for the same MIPS32r2 machine
+# define quarterround(a, b, c, d) QUARTERROUND(a, b, c, d)
+
+#endif // __OPTIMIZE_SIZE__
+
+static void chacha8(void)
+{
+ CHA[12]++;
+ CHA[13] += !CHA[12];
+
+ memcpy(CHAO, CHA, sizeof(CHAO));
+
+ for (unsigned i = 4; i; i--) {
+ quarterround(0, 4, 8, 12);
+ quarterround(1, 5, 9, 13);
+ quarterround(2, 6, 10, 14);
+ quarterround(3, 7, 11, 15);
+ quarterround(0, 5, 10, 15);
+ quarterround(1, 6, 11, 12);
+ quarterround(2, 7, 8, 13);
+ quarterround(3, 4, 9, 14);
+ }
+
+ for (unsigned i = 0; i < 16; i++)
+ CHAO[i] += CHA[i];
+
+ CHAOBYEs = sizeof(CHAO);
+}
+
+// rand16() is the most popular in rand*() functions family, it is called once per ≈ every DNS request,
+// so it should not waste generated entropy.
+// TODO: count real-world stats for an OpenWRT box.
+uint16_t charand16(void)
+{
+ // CHAOBYEs is always aligned at uint16_t boundary
+ if (!CHAOBYEs) chacha8();
+ CHAOBYEs -= sizeof(uint16_t);
+ return *(uint16_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
+
+uint32_t charand32(void)
+{
+ CHAOBYEs &= (UINT_MAX - 3); // skip some to make read aligned
+ if (!CHAOBYEs) chacha8();
+ CHAOBYEs -= sizeof(uint32_t);
+ return *(uint32_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
+
+uint64_t charand64(void)
+{
+ CHAOBYEs &= (UINT_MAX - 7); // skip some to make read aligned
+ if (!CHAOBYEs) chacha8();
+ CHAOBYEs -= sizeof(uint64_t);
+ return *(uint64_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
diff --git src/charand.h src/charand.h
new file mode 100644
index 00000000..a831426a
--- /dev/null
+++ src/charand.h
@@ -0,0 +1,42 @@
+/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991, or
+ (at your option) version 3 dated 29 June, 2007.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef CHARAND_H_C3BD69196178
+#define CHARAND_H_C3BD69196178
+
+#if !defined(HAVE_GETENTROPY) && !defined(RANDFILE)
+# define RANDFILE "/dev/urandom"
+#endif
+
+#include <stdint.h>
+#include <stdbool.h>
+
+int charand_init(void);
+bool charand_isinit(void);
+void charand_rekey(void);
+void charand_atfork_child(void);
+
+uint16_t charand16(void);
+uint32_t charand32(void);
+uint64_t charand64(void);
+
+static inline uintptr_t charandptr(void)
+{
+ const unsigned szp = sizeof(void*);
+ _Static_assert(szp == 4 || szp == 8, "void* is neither 32-bit nor 64-bit");
+ return szp == 4 ? charand32() : charand64();
+}
+
+#endif // CHARAND_H_C3BD69196178
diff --git src/dnsmasq.h src/dnsmasq.h
index 65c458e9..6421280d 100644
--- src/dnsmasq.h
+++ src/dnsmasq.h
@@ -1450,11 +1450,14 @@ char *ds_digest_name(int digest);
char *algo_digest_name(int algo);
char *nsec3_digest_name(int digest);
+#include "charand.h"
+
/* util.c */
void rand_init(void);
-unsigned short rand16(void);
-u32 rand32(void);
-u64 rand64(void);
+static inline unsigned short rand16(void) { return charand16(); }
+static inline u32 rand32(void) { return charand32(); }
+static inline u64 rand64(void) { return charand64(); }
+static inline uintptr_t randptr(void) { return charandptr(); }
unsigned int should_reseed(time_t last, time_t now);
int rr_on_list(struct rrlist *list, unsigned short rr);
int legal_hostname(char *name);
diff --git src/util.c src/util.c
index 1631328c..63a1bcfd 100644
--- src/util.c
+++ src/util.c
@@ -14,12 +14,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* The SURF random number generator was taken from djbdns-1.05, by
- Daniel J Bernstein, which is public domain. */
-
#include "dnsmasq.h"
-#include <stdbool.h>
#ifdef HAVE_BROKEN_RTC
#include <sys/times.h>
@@ -35,111 +31,14 @@
#include <sys/utsname.h>
#endif
-#ifndef HAVE_GETENTROPY
-// Non-standard libc getentropy() might use getrandom() avoiding filesystem access, that's great
-// for jails and chroots. However, a fallback implemetation is required for older systems that have
-// no getentropy() in libc. Also, getentropy() might block if the kernel has not initialized random
-// pool yet. However, dnsmasq is never started that early during the OpenWRT boot process (at least).
-#define getentropy(a, b) getentropy_fallback(a, b)
-static int getentropy_fallback(void *buffer, size_t length)
-{
- const int fd = open(RANDFILE, O_RDONLY);
- if (fd == -1)
- return -1;
- const int okay = read_write(fd, buffer, length, 1);
- close(fd);
- return okay ? 0 : -1;
-}
-#endif // HAVE_GETENTROPY
-
-/* SURF random number generator */
-
-static struct surf_state {
- u32 seed[32];
- u32 in[12];
-} surfst;
-static u32 out[8];
-static int outleft = 0;
-
void rand_init()
{
- const u32 * const in = surfst.in;
- const bool reseed = in[0] || in[1] || in[2] || in[3] || in[4] || in[5] ||
- in[6] || in[7] || in[8] || in[9] || in[10] || in[11];
- struct surf_state next;
- const unsigned bytes = reseed ? sizeof(next.seed) : sizeof(next);
- if (getentropy(&next, bytes) != 0)
- {
- if (!reseed)
- die(_("failed to seed the random number generator: %s"), NULL, EC_MISC);
- else
- my_syslog(LOG_ERR, _("failed to reseed the random number generator: %s"), strerror(errno));
- }
- _Static_assert(surfst.in == surfst.seed + countof(surfst.seed)); // No weird alignment gaps.
- for (unsigned int i = 0; i < bytes / sizeof(next.seed[0]); i++)
- surfst.seed[i] ^= next.seed[i];
-}
-
-static void rand_atfork(pid_t fpid)
-{
- struct surf_state next;
- for (unsigned int i = 0; i < countof(next.seed); i++)
- next.seed[i] = rand32();
- // Child reseeds the state with generated values. Parent skips those values explicitly as they
- // might be exposed to an external observer and used to guess something about PRNG of the child.
- if (fpid == 0)
- for (unsigned int i = 0; i < countof(next.seed); i++)
- surfst.seed[i] ^= next.seed[i];
-}
-
-#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
-#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
-
-static void surf(void)
-{
- u32 * const in = surfst.in;
- const u32 * const seed = surfst.seed;
- u32 t[12]; u32 x; u32 sum = 0;
- int r; int i; int loop;
-
- // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 146 years
- // at 4 GHz rate. So let counter be 64-bit as 128-bit counter is excessive.
- in[0]++; in[1] += !in[0];
-
- for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
- for (i = 0;i < 8;++i) out[i] = seed[24 + i];
- x = t[11];
- for (loop = 0;loop < 2;++loop) {
- for (r = 0;r < 16;++r) {
- sum += 0x9e3779b9;
- MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
- MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
- MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
- }
- for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
- }
-
- outleft = 8;
-}
-
-unsigned short rand16(void)
-{
- return (unsigned short)rand32();
-}
-
-u32 rand32(void)
-{
- if (!outleft)
- surf();
- return out[--outleft];
-}
-
-u64 rand64(void)
-{
- if (outleft < 2)
- surf();
- outleft -= 2;
- return (u64)out[outleft+1] + (((u64)out[outleft]) << 32);
+ if (charand_init() == 0)
+ return;
+ else if (charand_isinit())
+ my_syslog(LOG_ERR, _("failed to reseed the random number generator: %s"), strerror(errno));
+ else
+ die(_("failed to seed the random number generator: %s"), NULL, EC_MISC);
}
// Reseeding hourly to avoid low-entropy condition right after boot that is somewhat possible
@@ -379,8 +278,8 @@ void safe_pipe(int *fd, int read_noblock)
pid_t my_fork()
{
const pid_t fpid = fork();
- if (fpid != -1)
- rand_atfork(fpid);
+ if (fpid == 0)
+ charand_atfork_child();
return fpid;
}
--
2.34.1
More information about the Dnsmasq-discuss
mailing list