[Dnsmasq-discuss] [PATCH 1/4] fast lookup --server and --ipset
weichen302 at icloud.com
weichen302 at icloud.com
Sun Feb 8 10:11:19 GMT 2015
It has became a national pastime by abusing dnsmasq with thousands of --server
and --ipset rules in some part of the planet.
This patch makes dnsmasq handle it with ease.
add a new module for domain name lookup
---
Makefile | 2 +-
src/dict.c | 539 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/dnsmasq.c | 2 +-
src/dnsmasq.h | 47 +++++
4 files changed, 588 insertions(+), 2 deletions(-)
create mode 100644 src/dict.c
diff --git a/Makefile b/Makefile
index 2910320..d4a42ca 100644
--- a/Makefile
+++ b/Makefile
@@ -73,7 +73,7 @@ objs = cache.o rfc1035.o util.o option.o forward.o network.o \
dnsmasq.o dhcp.o lease.o rfc2131.o netlink.o dbus.o bpf.o \
helper.o tftp.o log.o conntrack.o dhcp6.o rfc3315.o \
dhcp-common.o outpacket.o radv.o slaac.o auth.o ipset.o \
- domain.o dnssec.o blockdata.o tables.o loop.o inotify.o
+ domain.o dnssec.o blockdata.o tables.o loop.o inotify.o dict.o
hdrs = dnsmasq.h config.h dhcp-protocol.h dhcp6-protocol.h \
dns-protocol.h radv-protocol.h ip6addr.h
diff --git a/src/dict.c b/src/dict.c
new file mode 100644
index 0000000..cad3463
--- /dev/null
+++ b/src/dict.c
@@ -0,0 +1,539 @@
+/* dict.c is Copyright (c) 2015 Chen Wei <weichen302 at gmail.com>
+
+ Use cascade of open addressing hash tables to store config options that
+ involve domain names.
+
+ root
+ |
+ +---------------------+
+ com org
+ | |
+ +------------------+ +-------------+
+ yahoo google twitter debian freebsd
+ | | | |
+ www mail +---------+ www
+ cn jp uk us
+ |
+ ftp
+
+ The lookup steps over domain name hierarchy top-down. All labels are stored
+ in open addressing hash tables. Sub-level labels that belong to different
+ parent nodes are stored separately. e.g. yahoo, google, and twitter are in
+ one hash table, while debian and freebsd are in another.
+
+ The hash table size is power of 2, two hash functions are used to compute
+ hash bucket. For locating a particular label from hash table, two hash
+ values are compared first, only if they are match, should the more
+ expensive string comparison be used to confirm the search.
+
+ The search should take constant time regardless the size of --ipset and
+ --server rules.
+
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991, or
+ (at your option) version 3 dated 29 June, 2007.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "dnsmasq.h"
+
+#define OPEN_ADDRESSING_MAXJUMP 7 /* no reason, just like 7 */
+#define OPEN_ADDRESSING_DEFAULT_SLOT 4
+#define FNV1_32A_INIT ((uint32_t)0x811c9dc5)
+#define max(A, B) ((A) > (B) ? (A) : (B))
+
+static char buf[MAXDNAME];
+
+/* prototypes */
+static struct dict_node *lookup_dictnode (struct dict_node *node, char *label);
+static void add_dicttree (struct dict_node *node, struct dict_node *sub);
+static void upsize_dicttree (struct dict_node *np);
+
+/* hash function 1 for double hashing
+ * 32 bit Fowler/Noll/Vo hash */
+static inline uint32_t fnv_32_hash (char *str)
+{
+ uint32_t hval = FNV1_32A_INIT;
+ unsigned char *s = (unsigned char *) str;
+
+ while (*s)
+ {
+ hval ^= (uint32_t) * s++;
+ hval +=
+ (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
+ }
+
+ return hval;
+}
+
+/* hash function 2 for double hashing
+ * the modified Bernstein hash, return an odd number */
+static inline unsigned int bernstein_odd (char *key)
+{
+ unsigned char *s = (unsigned char *) key;
+ unsigned int h = 0;
+
+ while (*s)
+ h = 33 * h ^ *s++;
+
+ return h % 2 ? h : h + 1;
+}
+
+/* convert domain to lower cases, remove leading blank, leading and trailing
+ * dot, string end with \0 */
+static inline void memcpy_lower (void *dst, void *src, int len)
+{
+ char *d = (char *) dst;
+ char *s = (char *) src;
+ int i;
+
+ /* skip leading dot and blank */
+ for ( ; *s != '\0' && (*s == '.' || *s == '\t' || *s == ' '); s++ );
+
+ for (i = 0; i < len; i++, d++, s++)
+ {
+ if (*s >= 'A' && *s <= 'Z')
+ *d = *s + 'a' - 'A';
+ else
+ *d = *s;
+ }
+
+ if (*--d == '.')
+ *d = '\0';
+ else
+ *++d = '\0';
+}
+
+struct dict_node * init_sub_dictnode (struct dict_node *node)
+{
+ unsigned n;
+
+ if (node->sub != NULL)
+ return node;
+
+ node->sub_slots = OPEN_ADDRESSING_DEFAULT_SLOT;
+ node->sub_loadmax = node->sub_slots * 3 / 4; // loading factor 0.75
+ node->sub = safe_malloc (node->sub_slots * sizeof (struct dict_node *));
+ for (n = 0; n < node->sub_slots; n++)
+ node->sub[n] = NULL;
+
+ return node;
+}
+
+/* allocate and initialize a new node */
+struct dict_node * new_dictnode (char *label, int label_len)
+{
+ struct dict_node *node;
+
+ node = safe_malloc (sizeof (struct dict_node));
+ if (node == NULL)
+ return NULL;
+
+ if (label == NULL || label_len == 0)
+ {
+ node->h1 = 0;
+ node->h2 = 0;
+ node->label = NULL;
+ }
+ else
+ {
+ node->label = strdup (label);
+ node->h1 = fnv_32_hash (label);
+ node->h2 = bernstein_odd (label);
+ }
+
+ node->sub_count = 0;
+ node->sub_slots = 0;
+ node->sub_loadmax = 0;
+ node->sub_maxjump = 0;
+ node->sub = NULL;
+ node->obj = NULL;
+
+ return node;
+}
+
+/* double the slots of dns node, it calls with add_dicttree each other
+ * the table size starts with 2^2, so that the new size remains 2^x, the
+ * double hash used is choosed to work with 2^n slots and perform well */
+static void upsize_dicttree (struct dict_node *np)
+{
+ struct dict_node **oldnodes;
+ unsigned i, oldsize;
+
+ oldsize = np->sub_slots;
+ oldnodes = np->sub;
+ np->sub_slots = oldsize * 2;
+ np->sub_loadmax = np->sub_slots * 3 / 4;
+ np->sub_count = 0;
+ np->sub_maxjump = 0;
+ np->sub = safe_malloc (np->sub_slots * sizeof (struct dict_node *));
+ for (i = 0; i < np->sub_slots; i++)
+ np->sub[i] = NULL;
+
+ for (i = 0; i < oldsize; i++)
+ {
+ if (oldnodes[i] != NULL)
+ {
+ add_dicttree (np, oldnodes[i]);
+ }
+ }
+
+ free (oldnodes);
+}
+
+/* add a sub-node, upsize if needed, calls with upsize_dicttree each other */
+static void add_dicttree (struct dict_node *node, struct dict_node *sub)
+{
+ int n;
+ uint32_t dh, idx;
+
+ if (node->sub == NULL)
+ init_sub_dictnode (node);
+
+ n = 0;
+ dh = sub->h1;
+ while (1)
+ {
+ idx = dh % node->sub_slots;
+ if (node->sub[idx] == NULL)
+ {
+ node->sub[idx] = sub;
+ node->sub_count += 1;
+ break;
+ }
+ else
+ {
+ dh += sub->h2;
+ n++;
+ }
+ }
+
+ node->sub_maxjump = max (n, node->sub_maxjump);
+ /* If it takes a lots of jumps to find an empty slot, or the used slots
+ * close to loading max, upsize the table */
+ if (node->sub_maxjump > OPEN_ADDRESSING_MAXJUMP ||
+ node->sub_count > node->sub_loadmax)
+ {
+ upsize_dicttree (node);
+ }
+
+ return;
+}
+
+/* add a new subnode to node, or update the attr of the subnode with same
+ * label
+ * return the subnode */
+struct dict_node *add_or_lookup_dictnode (struct dict_node *node, char *label)
+{
+ struct dict_node *np;
+
+ if ((np = lookup_dictnode (node, label)) == NULL)
+ {
+ if (node->sub == NULL)
+ {
+ init_sub_dictnode (node);
+ }
+ np = new_dictnode (label, strlen (label));
+ add_dicttree (node, np);
+ }
+
+ return np;
+}
+
+/* lookup the label in node's sub, return pointer if found, NULL if not */
+static struct dict_node *lookup_dictnode (struct dict_node *node, char *label)
+{
+ uint32_t h1, h2, dh, idx;
+ struct dict_node *np;
+
+ /* this domain doesn't have sub-domains */
+ if (node->sub == NULL)
+ {
+ return NULL;
+ }
+
+ dh = h1 = fnv_32_hash (label);
+ h2 = bernstein_odd (label);
+ idx = dh % node->sub_slots;
+ while ((np = node->sub[idx]) != NULL)
+ {
+ if (np->h1 == h1 && np->h2 == h2)
+ if (strcmp (np->label, label) == 0)
+ {
+ return np;
+ }
+
+ dh += h2;
+ idx = dh % node->sub_slots;
+ }
+
+ return NULL;
+}
+
+/* look up the whole domain pattern by step over DNS name hierarchy top down.
+ * for example, if the pattern is cn.debian.org, the lookup will start with
+ * org, then debian, then cn */
+struct dict_node * match_domain(struct dict_node *root, char *domain)
+{
+ char *labels[MAXLABELS];
+ int i, label_num;
+ int len = strlen (domain);
+ struct dict_node *node, *res;
+
+ if (root == NULL)
+ return NULL;
+
+ memset(buf, 0, sizeof(buf));
+ memcpy_lower (buf, domain, len);
+ /*
+ remove the trailing dot, make the last label top domain
+ if (buf[len - 1] == '.')
+ buf[len - 1] = '\0';
+ else
+ buf[len] = '\0';
+ */
+
+ for (i = 0; i < MAXLABELS; i++)
+ labels[i] = NULL;
+
+ label_num = 0;
+ labels[label_num++] = &buf[0];
+
+ /* split domain name into labels */
+ for (i = 0; buf[i] != '\0'; i++)
+ {
+ if (buf[i] == '.')
+ {
+ buf[i] = '\0';
+ labels[label_num++] = &buf[i + 1];
+ }
+ }
+
+ node = root;
+ res = NULL;
+ for (i = label_num - 1; i >= 0; i--)
+ {
+ node = lookup_dictnode (node, labels[i]);
+
+ /* match longest pattern, e.g. for pattern debian.org and cn.debian.org,
+ * domain name ftp.cn.debian.org will match pattern cn.debian.org */
+ if (node == NULL)
+ break;
+
+ if (node->obj != NULL)
+ res = node;
+ }
+
+ if (res == NULL)
+ return NULL;
+
+ return res;
+}
+
+/* look up the whole domain pattern by step over DNS name hierarchy top down.
+ * for example, if the pattern is cn.debian.org, the lookup will start with
+ * org, then debian, then cn */
+struct dict_node * lookup_domain (struct dict_node *root, char *domain)
+{
+ char *labels[MAXLABELS];
+ int i, label_num;
+ int len = strlen (domain);
+ struct dict_node *node;
+
+ memset(buf, 0, sizeof(buf));
+ memcpy_lower (buf, domain, len);
+
+ for (i = 0; i < MAXLABELS; i++)
+ labels[i] = NULL;
+
+ label_num = 0;
+
+ labels[label_num++] = &buf[0];
+
+ for (i = 0; buf[i] != '\0'; i++)
+ {
+ if (buf[i] == '.')
+ {
+ buf[i] = '\0';
+ labels[label_num++] = &buf[i + 1];
+ }
+ }
+
+ node = root;
+ for (i = label_num - 1; i >= 0 && node != NULL; i--)
+ {
+ node = lookup_dictnode (node, labels[i]);
+ }
+
+ return i == -1 ? node : NULL;
+}
+
+/* add a domain pattern in the form of debian.org to root
+ * return the node with lowest hierarchy */
+struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain)
+{
+ char *labels[MAXLABELS];
+ int i, label_num;
+ int len = strlen (domain);
+ struct dict_node *node;
+
+ memset(buf, 0, sizeof(buf));
+ memcpy_lower (buf, domain, len);
+
+ for (i = 0; i < MAXLABELS; i++)
+ labels[i] = NULL;
+
+ label_num = 0;
+ labels[label_num++] = &buf[0];
+
+ for (i = 0; buf[i] != '\0'; i++)
+ {
+ if (buf[i] == '.')
+ {
+ buf[i] = '\0';
+ labels[label_num++] = &buf[i + 1];
+ }
+ }
+
+ node = root;
+ for (i = label_num - 1; i >= 0; i--)
+ {
+ node = add_or_lookup_dictnode (node, labels[i]);
+ }
+
+ return node;
+}
+
+/* free node and all sub-nodes recursively. Unused. */
+void free_dicttree (struct dict_node *node)
+{
+ struct dict_node *np;
+ unsigned i;
+
+ if (node->sub_count > 0)
+ {
+ for (i = 0; i < node->sub_slots; i++)
+ {
+ np = node->sub[i];
+ if (np != NULL)
+ {
+ if (np->label != NULL)
+ free (np->label);
+
+ if (np->obj != NULL)
+ free (np->obj);
+
+ free_dicttree (np);
+ }
+ }
+ free (node->sub);
+ }
+
+ free (node);
+}
+
+/* only compare addr, source_addr, interface, and flags */
+static inline int is_same_server(struct server *s1, struct server *s2)
+{
+ if (memcmp(&s1->addr, &s2->addr, sizeof(union mysockaddr)) != 0)
+ return 0;
+
+ if (strncmp(s1->interface, s2->interface, IF_NAMESIZE + 1) != 0)
+ return 0;
+
+ if (s1->flags != s2->flags)
+ return 0;
+
+ return 1;
+}
+
+/* duplicate a struct server, but only copy addr, source_addr, interfaces, and
+ * flags
+ * return the allocated pointer */
+static inline struct server *serverdup(struct server *src)
+{
+ struct server *dst;
+
+ dst = safe_malloc(sizeof(struct server));
+ memcpy(dst, src, sizeof(struct server));
+
+ return dst;
+}
+
+/* lookup server by compare addr, source_addr, interface, and flags with
+ * servers in daemon->servers link list. If no match found, then insert a new
+ * server
+ *
+ * Return the lookup result or the newly created server*/
+struct server *lookup_or_install_new_server(struct server *serv)
+{
+ struct server *res;
+
+ res = NULL;
+ for (res = daemon->servers; res != NULL; res = res->next) {
+ if (is_same_server(res, serv))
+ break;
+ }
+
+ if (res == NULL) {
+ res = serverdup(serv);
+ res->next = daemon->servers;
+ daemon->servers = res;
+ }
+
+ return res;
+}
+
+/* print the daemon->dh_special_domains tree recursively
+ *
+ * do we really need it? */
+void print_server_special_domains (struct dict_node *node,
+ char *parents[], int current_level)
+{
+ struct dict_node *np;
+ struct special_domain *obj;
+ char buf[MAXDNAME];
+ char ip_buf[16];
+ int j, level;
+ int port = 0;
+ uint32_t i;
+
+ level = current_level + 1;
+ if (node->label != NULL)
+ {
+ parents[level] = node->label;
+ if (node->obj != NULL)
+ {
+ obj = (struct special_domain *) node->obj;
+ if (obj->domain_flags & SERV_HAS_DOMAIN)
+ {
+ memset (buf, 0, MAXDNAME);
+ for (j = level; j > 1; j--)
+ {
+ strcat (buf, parents[j]);
+ strcat (buf, ".");
+ }
+ buf[strlen (buf) - 1] = '\0';
+ port = prettyprint_addr (&obj->server->addr, ip_buf);
+ my_syslog(LOG_INFO, _("using nameserver %s#%d for domain %s"),
+ ip_buf, port, buf);
+ }
+ }
+ }
+
+ if (node->sub_count > 0)
+ {
+ for (i = 0; i < node->sub_slots; i++)
+ if ((np = node->sub[i]) != NULL)
+ print_server_special_domains (np, parents, level);
+ }
+}
diff --git a/src/dnsmasq.c b/src/dnsmasq.c
index e903a24..f816f42 100644
--- a/src/dnsmasq.c
+++ b/src/dnsmasq.c
@@ -250,7 +250,7 @@ int main (int argc, char **argv)
#endif
#ifdef HAVE_IPSET
- if (daemon->ipsets)
+ if (daemon->dh_ipsets)
ipset_init();
#endif
diff --git a/src/dnsmasq.h b/src/dnsmasq.h
index 89e758b..bec55a2 100644
--- a/src/dnsmasq.h
+++ b/src/dnsmasq.h
@@ -516,6 +516,36 @@ struct ipsets {
struct ipsets *next;
};
+/* a dictionary node for open addressing hash table
+ * it has a key, "label", and a value "obj" as container, there are several
+ * other values for maintaining the hash table and lookup
+ *
+ * For ipsets match, only INSERT and LOOKUP operation needed
+ */
+struct dict_node {
+ char *label; /* key */
+ void *obj; /* the value, can point to anything */
+ uint32_t h1; /* from hash function 1, fnv_32_hash */
+ uint32_t h2; /* from hash function 2, bernstein_odd */
+ unsigned sub_slots; /* size of hash table sub */
+ int sub_count; /* items stored in sub */
+ int sub_loadmax; /* max items stored before upsize sub */
+ int sub_maxjump; /* max jumps for insertion, upsize when reach */
+ struct dict_node **sub;
+};
+
+struct special_domain {
+ struct server *server;
+ union mysockaddr addr;
+ int domain_flags;
+};
+
+
+struct ipsets_names {
+ char **sets; /* ipsets names end with NULL ptr */
+ int count;
+};
+
struct irec {
union mysockaddr addr;
struct in_addr netmask; /* only valid for IPv4 */
@@ -945,6 +975,11 @@ extern struct daemon {
struct bogus_addr *bogus_addr, *ignore_addr;
struct server *servers;
struct ipsets *ipsets;
+
+ struct dict_node *dh_ipsets;
+ struct dict_node *dh_ipset_names;
+ struct dict_node *dh_special_domains;
+
int log_fac; /* log facility */
char *log_file; /* optional log file */
int max_logs; /* queue limit */
@@ -1370,6 +1405,18 @@ void ipset_init(void);
int add_to_ipset(const char *setname, const struct all_addr *ipaddr, int flags, int remove);
#endif
+/* dict.c */
+#define MAXLABELS 128
+struct dict_node *new_dictnode (char *label, int len);
+struct dict_node *add_or_lookup_dictnode (struct dict_node *node, char *label);
+struct dict_node *lookup_domain(struct dict_node *root, char *domain);
+struct dict_node *match_domain(struct dict_node *root, char *domain);
+struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain);
+struct server *lookup_or_install_new_server(struct server *serv);
+void free_dicttree (struct dict_node *node);
+void print_server_special_domains(struct dict_node *node,
+ char *parents[], int current_level);
+
/* helper.c */
#if defined(HAVE_SCRIPT)
int create_helper(int event_fd, int err_fd, uid_t uid, gid_t gid, long max_fd);
--
1.7.10.4
More information about the Dnsmasq-discuss
mailing list