[Dnsmasq-discuss] [PATCH 1/4] fast lookup --server and --ipset

weichen302 at icloud.com weichen302 at icloud.com
Sun Feb 8 10:11:19 GMT 2015


It has became a national pastime by abusing dnsmasq with thousands of --server
and --ipset rules in some part of the planet.

This patch makes dnsmasq handle it with ease.


add a new module for domain name lookup


---
 Makefile      |    2 +-
 src/dict.c    |  539 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dnsmasq.c |    2 +-
 src/dnsmasq.h |   47 +++++
 4 files changed, 588 insertions(+), 2 deletions(-)
 create mode 100644 src/dict.c

diff --git a/Makefile b/Makefile
index 2910320..d4a42ca 100644
--- a/Makefile
+++ b/Makefile
@@ -73,7 +73,7 @@ objs = cache.o rfc1035.o util.o option.o forward.o network.o \
        dnsmasq.o dhcp.o lease.o rfc2131.o netlink.o dbus.o bpf.o \
        helper.o tftp.o log.o conntrack.o dhcp6.o rfc3315.o \
        dhcp-common.o outpacket.o radv.o slaac.o auth.o ipset.o \
-       domain.o dnssec.o blockdata.o tables.o loop.o inotify.o
+       domain.o dnssec.o blockdata.o tables.o loop.o inotify.o dict.o
 
 hdrs = dnsmasq.h config.h dhcp-protocol.h dhcp6-protocol.h \
        dns-protocol.h radv-protocol.h ip6addr.h
diff --git a/src/dict.c b/src/dict.c
new file mode 100644
index 0000000..cad3463
--- /dev/null
+++ b/src/dict.c
@@ -0,0 +1,539 @@
+/*  dict.c is Copyright (c) 2015 Chen Wei <weichen302 at gmail.com>
+
+    Use cascade of open addressing hash tables to store config options that
+    involve domain names.
+
+                       root
+                        |
+             +---------------------+
+            com                   org
+             |                     |
+    +------------------+     +-------------+
+    yahoo google twitter   debian       freebsd
+      |      |               |             |
+     www    mail          +---------+     www
+                          cn jp uk us
+                          |
+                         ftp
+
+    The lookup steps over domain name hierarchy top-down. All labels are stored
+    in open addressing hash tables. Sub-level labels that belong to different
+    parent nodes are stored separately. e.g. yahoo, google, and twitter are in
+    one hash table, while debian and freebsd are in another.
+
+    The hash table size is power of 2, two hash functions are used to compute
+    hash bucket. For locating a particular label from hash table, two hash
+    values are compared first, only if they are match, should the more
+    expensive string comparison be used to confirm the search.
+
+    The search should take constant time regardless the size of --ipset and
+    --server rules.
+
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 dated June, 1991, or
+    (at your option) version 3 dated 29 June, 2007.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "dnsmasq.h"
+
+#define OPEN_ADDRESSING_MAXJUMP 7                /* no reason, just like 7 */
+#define OPEN_ADDRESSING_DEFAULT_SLOT 4
+#define FNV1_32A_INIT ((uint32_t)0x811c9dc5)
+#define max(A, B) ((A) > (B) ? (A) : (B))
+
+static char buf[MAXDNAME];
+
+/* prototypes */
+static struct dict_node *lookup_dictnode (struct dict_node *node, char *label);
+static void add_dicttree (struct dict_node *node, struct dict_node *sub);
+static void upsize_dicttree (struct dict_node *np);
+
+/* hash function 1 for double hashing
+ * 32 bit Fowler/Noll/Vo hash */
+static inline uint32_t fnv_32_hash (char *str)
+{
+  uint32_t hval = FNV1_32A_INIT;
+  unsigned char *s = (unsigned char *) str;
+
+  while (*s)
+    {
+      hval ^= (uint32_t) * s++;
+      hval +=
+        (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
+    }
+
+  return hval;
+}
+
+/* hash function 2 for double hashing
+ * the modified Bernstein hash, return an odd number */
+static inline unsigned int bernstein_odd (char *key)
+{
+  unsigned char *s = (unsigned char *) key;
+  unsigned int h = 0;
+
+  while (*s)
+    h = 33 * h ^ *s++;
+
+  return h % 2 ? h : h + 1;
+}
+
+/* convert domain to lower cases, remove leading blank, leading and trailing
+ * dot, string end with \0 */
+static inline void memcpy_lower (void *dst, void *src, int len)
+{
+  char *d = (char *) dst;
+  char *s = (char *) src;
+  int i;
+
+  /* skip leading dot and blank */
+  for ( ; *s != '\0' && (*s == '.' || *s == '\t' || *s == ' '); s++ );
+
+  for (i = 0; i < len; i++, d++, s++)
+    {
+      if (*s >= 'A' && *s <= 'Z')
+        *d = *s + 'a' - 'A';
+      else
+        *d = *s;
+    }
+
+  if (*--d == '.')
+      *d = '\0';
+  else
+      *++d = '\0';
+}
+
+struct dict_node * init_sub_dictnode (struct dict_node *node)
+{
+  unsigned n;
+
+  if (node->sub != NULL)
+    return node;
+
+  node->sub_slots = OPEN_ADDRESSING_DEFAULT_SLOT;
+  node->sub_loadmax = node->sub_slots * 3 / 4;        // loading factor 0.75
+  node->sub = safe_malloc (node->sub_slots * sizeof (struct dict_node *));
+  for (n = 0; n < node->sub_slots; n++)
+    node->sub[n] = NULL;
+
+  return node;
+}
+
+/* allocate and initialize a new node */
+struct dict_node * new_dictnode (char *label, int label_len)
+{
+  struct dict_node *node;
+
+  node = safe_malloc (sizeof (struct dict_node));
+  if (node == NULL)
+    return NULL;
+
+  if (label == NULL || label_len == 0)
+    {
+      node->h1 = 0;
+      node->h2 = 0;
+      node->label = NULL;
+    }
+  else
+    {
+      node->label = strdup (label);
+      node->h1 = fnv_32_hash (label);
+      node->h2 = bernstein_odd (label);
+    }
+
+  node->sub_count = 0;
+  node->sub_slots = 0;
+  node->sub_loadmax = 0;
+  node->sub_maxjump = 0;
+  node->sub = NULL;
+  node->obj = NULL;
+
+  return node;
+}
+
+/* double the slots of dns node, it calls with add_dicttree each other
+ * the table size starts with 2^2, so that the new size remains 2^x, the
+ * double hash used is choosed to work with 2^n slots and perform well */
+static void upsize_dicttree (struct dict_node *np)
+{
+  struct dict_node **oldnodes;
+  unsigned i, oldsize;
+
+  oldsize = np->sub_slots;
+  oldnodes = np->sub;
+  np->sub_slots = oldsize * 2;
+  np->sub_loadmax = np->sub_slots * 3 / 4;
+  np->sub_count = 0;
+  np->sub_maxjump = 0;
+  np->sub = safe_malloc (np->sub_slots * sizeof (struct dict_node *));
+  for (i = 0; i < np->sub_slots; i++)
+    np->sub[i] = NULL;
+
+  for (i = 0; i < oldsize; i++)
+    {
+      if (oldnodes[i] != NULL)
+        {
+          add_dicttree (np, oldnodes[i]);
+        }
+    }
+
+  free (oldnodes);
+}
+
+/* add a sub-node, upsize if needed, calls with upsize_dicttree each other */
+static void add_dicttree (struct dict_node *node, struct dict_node *sub)
+{
+  int n;
+  uint32_t dh, idx;
+
+  if (node->sub == NULL)
+    init_sub_dictnode (node);
+
+  n = 0;
+  dh = sub->h1;
+  while (1)
+    {
+      idx = dh % node->sub_slots;
+      if (node->sub[idx] == NULL)
+        {
+          node->sub[idx] = sub;
+          node->sub_count += 1;
+          break;
+        }
+      else
+        {
+          dh += sub->h2;
+          n++;
+        }
+    }
+
+  node->sub_maxjump = max (n, node->sub_maxjump);
+  /* If it takes a lots of jumps to find an empty slot, or the used slots
+   * close to loading max, upsize the table */
+  if (node->sub_maxjump > OPEN_ADDRESSING_MAXJUMP ||
+      node->sub_count > node->sub_loadmax)
+    {
+      upsize_dicttree (node);
+    }
+
+  return;
+}
+
+/* add a new subnode to node, or update the attr of the subnode with same
+ * label
+ * return the subnode */
+struct dict_node *add_or_lookup_dictnode (struct dict_node *node, char *label)
+{
+  struct dict_node *np;
+
+  if ((np = lookup_dictnode (node, label)) == NULL)
+    {
+      if (node->sub == NULL)
+        {
+          init_sub_dictnode (node);
+        }
+      np = new_dictnode (label, strlen (label));
+      add_dicttree (node, np);
+    }
+
+  return np;
+}
+
+/* lookup the label in node's sub, return pointer if found, NULL if not */
+static struct dict_node *lookup_dictnode (struct dict_node *node, char *label)
+{
+  uint32_t h1, h2, dh, idx;
+  struct dict_node *np;
+
+  /* this domain doesn't have sub-domains */
+  if (node->sub == NULL)
+    {
+      return NULL;
+    }
+
+  dh = h1 = fnv_32_hash (label);
+  h2 = bernstein_odd (label);
+  idx = dh % node->sub_slots;
+  while ((np = node->sub[idx]) != NULL)
+    {
+      if (np->h1 == h1 && np->h2 == h2)
+        if (strcmp (np->label, label) == 0)
+          {
+            return np;
+          }
+
+      dh += h2;
+      idx = dh % node->sub_slots;
+    }
+
+  return NULL;
+}
+
+/* look up the whole domain pattern by step over DNS name hierarchy top down.
+ * for example, if the pattern is cn.debian.org, the lookup will start with
+ * org, then debian, then cn */
+struct dict_node * match_domain(struct dict_node *root, char *domain)
+{
+  char *labels[MAXLABELS];
+  int i, label_num;
+  int len = strlen (domain);
+  struct dict_node *node, *res;
+
+  if (root == NULL)
+      return NULL;
+
+  memset(buf, 0, sizeof(buf));
+  memcpy_lower (buf, domain, len);
+  /*
+  remove the trailing dot, make the last label top domain
+  if (buf[len - 1] == '.')
+    buf[len - 1] = '\0';
+  else
+    buf[len] = '\0';
+  */
+
+  for (i = 0; i < MAXLABELS; i++)
+    labels[i] = NULL;
+
+  label_num = 0;
+  labels[label_num++] = &buf[0];
+
+  /* split domain name into labels */
+  for (i = 0; buf[i] != '\0'; i++)
+    {
+      if (buf[i] == '.')
+        {
+          buf[i] = '\0';
+          labels[label_num++] = &buf[i + 1];
+        }
+    }
+
+  node = root;
+  res = NULL;
+  for (i = label_num - 1; i >= 0; i--)
+    {
+      node = lookup_dictnode (node, labels[i]);
+
+      /* match longest pattern, e.g. for pattern debian.org and cn.debian.org,
+       * domain name ftp.cn.debian.org will match pattern cn.debian.org */
+      if (node == NULL)
+          break;
+
+      if (node->obj != NULL)
+          res = node;
+    }
+
+  if (res == NULL)
+    return NULL;
+
+  return res;
+}
+
+/* look up the whole domain pattern by step over DNS name hierarchy top down.
+ * for example, if the pattern is cn.debian.org, the lookup will start with
+ * org, then debian, then cn */
+struct dict_node * lookup_domain (struct dict_node *root, char *domain)
+{
+  char *labels[MAXLABELS];
+  int i, label_num;
+  int len = strlen (domain);
+  struct dict_node *node;
+
+  memset(buf, 0, sizeof(buf));
+  memcpy_lower (buf, domain, len);
+
+  for (i = 0; i < MAXLABELS; i++)
+    labels[i] = NULL;
+
+  label_num = 0;
+
+  labels[label_num++] = &buf[0];
+
+  for (i = 0; buf[i] != '\0'; i++)
+    {
+      if (buf[i] == '.')
+        {
+          buf[i] = '\0';
+          labels[label_num++] = &buf[i + 1];
+        }
+    }
+
+  node = root;
+  for (i = label_num - 1; i >= 0 && node != NULL; i--)
+    {
+      node = lookup_dictnode (node, labels[i]);
+    }
+
+  return i == -1 ? node : NULL;
+}
+
+/* add a domain pattern in the form of debian.org to root
+ * return the node with lowest hierarchy */
+struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain)
+{
+  char *labels[MAXLABELS];
+  int i, label_num;
+  int len = strlen (domain);
+  struct dict_node *node;
+
+  memset(buf, 0, sizeof(buf));
+  memcpy_lower (buf, domain, len);
+
+  for (i = 0; i < MAXLABELS; i++)
+    labels[i] = NULL;
+
+  label_num = 0;
+  labels[label_num++] = &buf[0];
+
+  for (i = 0; buf[i] != '\0'; i++)
+    {
+      if (buf[i] == '.')
+        {
+          buf[i] = '\0';
+          labels[label_num++] = &buf[i + 1];
+        }
+    }
+
+  node = root;
+  for (i = label_num - 1; i >= 0; i--)
+    {
+      node = add_or_lookup_dictnode (node, labels[i]);
+    }
+
+  return node;
+}
+
+/* free node and all sub-nodes recursively. Unused. */
+void free_dicttree (struct dict_node *node)
+{
+  struct dict_node *np;
+  unsigned i;
+
+  if (node->sub_count > 0)
+    {
+      for (i = 0; i < node->sub_slots; i++)
+        {
+          np = node->sub[i];
+          if (np != NULL)
+            {
+              if (np->label != NULL)
+                free (np->label);
+
+              if (np->obj != NULL)
+                free (np->obj);
+
+              free_dicttree (np);
+            }
+        }
+      free (node->sub);
+    }
+
+  free (node);
+}
+
+/* only compare addr, source_addr, interface, and flags */
+static inline int is_same_server(struct server *s1, struct server *s2)
+{
+    if (memcmp(&s1->addr, &s2->addr, sizeof(union mysockaddr)) != 0)
+        return 0;
+
+    if (strncmp(s1->interface, s2->interface, IF_NAMESIZE + 1) != 0)
+        return 0;
+
+    if (s1->flags != s2->flags)
+        return 0;
+
+    return 1;
+}
+
+/* duplicate a struct server, but only copy addr, source_addr, interfaces, and
+ * flags
+ * return the allocated pointer */
+static inline struct server *serverdup(struct server *src)
+{
+    struct server *dst;
+
+    dst = safe_malloc(sizeof(struct server));
+    memcpy(dst, src, sizeof(struct server));
+
+    return dst;
+}
+
+/* lookup server by compare addr, source_addr, interface, and flags with
+ * servers in daemon->servers link list. If no match found, then insert a new
+ * server
+ *
+ * Return the lookup result or the newly created server*/
+struct server *lookup_or_install_new_server(struct server *serv)
+{
+    struct server *res;
+
+    res = NULL;
+    for (res = daemon->servers; res != NULL; res = res->next) {
+        if (is_same_server(res, serv))
+            break;
+    }
+
+    if (res == NULL) {
+        res = serverdup(serv);
+        res->next = daemon->servers;
+        daemon->servers = res;
+    }
+
+    return res;
+}
+
+/* print the daemon->dh_special_domains tree recursively
+ *
+ * do we really need it?  */
+void print_server_special_domains (struct dict_node *node,
+                                   char *parents[], int current_level)
+{
+  struct dict_node *np;
+  struct special_domain *obj;
+  char buf[MAXDNAME];
+  char ip_buf[16];
+  int j, level;
+  int port = 0;
+  uint32_t i;
+
+  level = current_level + 1;
+  if (node->label != NULL)
+    {
+      parents[level] = node->label;
+      if (node->obj != NULL)
+        {
+          obj = (struct special_domain *) node->obj;
+          if (obj->domain_flags & SERV_HAS_DOMAIN)
+            {
+              memset (buf, 0, MAXDNAME);
+              for (j = level; j > 1; j--)
+                {
+                  strcat (buf, parents[j]);
+                  strcat (buf, ".");
+                }
+              buf[strlen (buf) - 1] = '\0';
+              port = prettyprint_addr (&obj->server->addr, ip_buf);
+              my_syslog(LOG_INFO, _("using nameserver %s#%d for domain %s"),
+                                    ip_buf, port, buf);
+            }
+        }
+    }
+
+  if (node->sub_count > 0)
+    {
+      for (i = 0; i < node->sub_slots; i++)
+        if ((np = node->sub[i]) != NULL)
+          print_server_special_domains (np, parents, level);
+    }
+}
diff --git a/src/dnsmasq.c b/src/dnsmasq.c
index e903a24..f816f42 100644
--- a/src/dnsmasq.c
+++ b/src/dnsmasq.c
@@ -250,7 +250,7 @@ int main (int argc, char **argv)
 #endif
 
 #ifdef HAVE_IPSET
-  if (daemon->ipsets)
+  if (daemon->dh_ipsets)
     ipset_init();
 #endif
 
diff --git a/src/dnsmasq.h b/src/dnsmasq.h
index 89e758b..bec55a2 100644
--- a/src/dnsmasq.h
+++ b/src/dnsmasq.h
@@ -516,6 +516,36 @@ struct ipsets {
   struct ipsets *next;
 };
 
+/* a dictionary node for open addressing hash table
+ * it has a key, "label", and a value "obj" as container, there are several
+ * other values for maintaining the hash table and lookup
+ *
+ * For ipsets match, only INSERT and LOOKUP operation needed
+ */
+struct dict_node {
+  char *label;          /* key */
+  void *obj;            /* the value, can point to anything */
+  uint32_t h1;          /* from hash function 1, fnv_32_hash */
+  uint32_t h2;          /* from hash function 2, bernstein_odd */
+  unsigned sub_slots;   /* size of hash table sub */
+  int sub_count;        /* items stored in sub */
+  int sub_loadmax;      /* max items stored before upsize sub */
+  int sub_maxjump;      /* max jumps for insertion, upsize when reach */
+  struct dict_node **sub;
+};
+
+struct special_domain {
+  struct server *server;
+  union mysockaddr addr;
+  int domain_flags;
+};
+
+
+struct ipsets_names {
+  char **sets;          /* ipsets names end with NULL ptr */
+  int    count;
+};
+
 struct irec {
   union mysockaddr addr;
   struct in_addr netmask; /* only valid for IPv4 */
@@ -945,6 +975,11 @@ extern struct daemon {
   struct bogus_addr *bogus_addr, *ignore_addr;
   struct server *servers;
   struct ipsets *ipsets;
+
+  struct dict_node *dh_ipsets;
+  struct dict_node *dh_ipset_names;
+  struct dict_node *dh_special_domains;
+
   int log_fac; /* log facility */
   char *log_file; /* optional log file */
   int max_logs;  /* queue limit */
@@ -1370,6 +1405,18 @@ void ipset_init(void);
 int add_to_ipset(const char *setname, const struct all_addr *ipaddr, int flags, int remove);
 #endif
 
+/* dict.c */
+#define MAXLABELS 128
+struct dict_node *new_dictnode (char *label, int len);
+struct dict_node *add_or_lookup_dictnode (struct dict_node *node, char *label);
+struct dict_node *lookup_domain(struct dict_node *root, char *domain);
+struct dict_node *match_domain(struct dict_node *root, char *domain);
+struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain);
+struct server *lookup_or_install_new_server(struct server *serv);
+void free_dicttree (struct dict_node *node);
+void print_server_special_domains(struct dict_node *node,
+                                  char *parents[], int current_level);
+
 /* helper.c */
 #if defined(HAVE_SCRIPT)
 int create_helper(int event_fd, int err_fd, uid_t uid, gid_t gid, long max_fd);
-- 
1.7.10.4




More information about the Dnsmasq-discuss mailing list