/* dns_answer.c - Receive and process incoming dns queries.
Copyright (C) 2000, 2001 Thomas Moestl
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2011 Paul A. Rombouts
This file is part of the pdnsd package.
pdnsd is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
pdnsd is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with pdnsd; see the file COPYING. If not, see
.
*/
/*
* STANDARD CONFORMITY
*
* There are several standard conformity issues noted in the comments.
* Some additional comments:
*
* I always set RA but I ignore RD largely (in everything but CNAME recursion),
* not because it is not supported, but because I _always_ do a recursive
* resolve in order to be able to cache the results.
*/
#include
#include "ipvers.h"
#include
#include
#include
#ifdef HAVE_SYS_POLL_H
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "thread.h"
#include "list.h"
#include "dns.h"
#include "dns_answer.h"
#include "dns_query.h"
#include "helpers.h"
#include "cache.h"
#include "error.h"
#include "debug.h"
/*
* This is for error handling to prevent spewing the log files.
* Maximums of different message types are set.
* Races do not really matter here, so no locks.
*/
#define TCP_MAX_ERRS 10
#define UDP_MAX_ERRS 10
#define MEM_MAX_ERRS 10
#define THRD_MAX_ERRS 10
#define MISC_MAX_ERRS 10
static volatile unsigned long da_tcp_errs=0;
static volatile unsigned long da_udp_errs=0;
static volatile unsigned long da_mem_errs=0;
static volatile unsigned long da_thrd_errs=0;
#if DEBUG>0
static volatile unsigned long da_misc_errs=0;
#endif
static volatile int procs=0; /* active query processes */
static volatile int qprocs=0; /* queued query processes */
static volatile unsigned long dropped=0,spawned=0;
static volatile unsigned thrid_cnt=0;
static pthread_mutex_t proc_lock = PTHREAD_MUTEX_INITIALIZER;
#ifdef SOCKET_LOCKING
static pthread_mutex_t s_lock = PTHREAD_MUTEX_INITIALIZER;
#endif
typedef union {
#ifdef ENABLE_IPV4
# if (TARGET==TARGET_LINUX)
struct in_pktinfo pi4;
# else
struct in_addr ai4;
# endif
#endif
#ifdef ENABLE_IPV6
struct in6_pktinfo pi6;
#endif
} pkt_info_t;
typedef struct {
union {
#ifdef ENABLE_IPV4
struct sockaddr_in sin4;
#endif
#ifdef ENABLE_IPV6
struct sockaddr_in6 sin6;
#endif
} addr;
pkt_info_t pi;
int sock;
int proto;
size_t len;
unsigned char buf[0]; /* Actual size determined by global.udpbufsize */
} udp_buf_t;
/* ALLOCINITIALSIZE should be at least sizeof(dns_msg_t) = 2+12 */
#define ALLOCINITIALSIZE 256
/* This mask corresponds to a chunk size of 128 bytes. */
#define ALLOCCHUNKSIZEMASK ((size_t)0x7f)
typedef struct {
unsigned short qtype;
unsigned short qclass;
unsigned char query[0];
} dns_queryel_t;
#define S_ANSWER 1
#define S_AUTHORITY 2
#define S_ADDITIONAL 3
typedef struct {
unsigned short tp,dlen;
unsigned char nm[0];
/* unsigned char data[0]; */
} sva_t;
/*
* Mark an additional record as added to avoid double records.
*/
static int sva_add(dlist *sva, const unsigned char *rhn, unsigned short tp, unsigned short dlen, void* data)
{
if (sva) {
size_t rlen=rhnlen(rhn);
sva_t *st;
if (!(*sva=dlist_grow(*sva,sizeof(sva_t)+rlen+dlen))) {
return 0;
}
st=dlist_last(*sva);
st->tp=tp;
st->dlen=dlen;
memcpy(mempcpy(st->nm,rhn,rlen),data,dlen);
}
return 1;
}
/* ans_ttl computes the ttl value to return to the client.
This is the ttl value stored in the cache entry minus the time
the cache entry has lived in the cache.
Local cache entries are an exception, they never "age".
*/
inline static time_t ans_ttl(rr_set_t *rrset, time_t queryts)
{
time_t ttl= rrset->ttl;
if (!(rrset->flags&CF_LOCAL)) {
time_t tpassed= queryts - rrset->ts;
if(tpassed<0) tpassed=0;
ttl -= tpassed;
if(ttl<0) ttl=0;
}
return ttl;
}
/* follow_cname_chain takes a cache entry and a buffer (must be at least DNSNAMEBUFSIZE bytes),
and copies the name indicated by the first cname record in the cache entry.
The name is returned in length-byte string notation.
follow_cname_chain returns 1 if a cname record is found, otherwise 0.
*/
inline static int follow_cname_chain(dns_cent_t *c, unsigned char *name)
{
rr_set_t *rrset=getrrset_CNAME(c);
rr_bucket_t *rr;
if (!rrset || !(rr=rrset->rrs))
return 0;
PDNSD_ASSERT(rr->rdlen <= DNSNAMEBUFSIZE, "follow_cname_chain: record too long");
memcpy(name,rr->data,rr->rdlen);
return 1;
}
/*
* Add data from a rr_bucket_t (as in cache) into a dns message in ans. Ans is grown
* to fit, sz is the old size of the packet (it is modified so at the end of the procedure
* it is the new size), type is the rr type and ltime is the time in seconds the record is
* old.
* cb is the buffer used for message compression. *cb should be NULL when you call compress_name
* or add_to_response the first time.
* It gets filled with a pointer to compression information that can be reused in subsequent calls
* to add_to_response.
* sect is the section (S_ANSWER, S_AUTHORITY or S_ADDITIONAL) in which the record
* belongs logically. Note that you still have to add the rrs in the right order (answer rrs first,
* then authority and last additional).
*/
static int add_rr(dns_msg_t **ans, size_t *sz, size_t *allocsz,
unsigned char *rrn, unsigned short type, uint32_t ttl,
unsigned int dlen, void *data, char section, unsigned *udp, dlist *cb)
{
size_t osz= *sz;
unsigned int ilen,blen,rdlen;
unsigned char *rrht;
{
unsigned int nlen;
unsigned char nbuf[DNSNAMEBUFSIZE];
if (!(nlen=compress_name(rrn,nbuf,*sz,cb)))
return 0;
/* This buffer is usually over-allocated due to compression.
Never mind, just a few bytes, and the buffer is freed soon. */
{
size_t newsz= dnsmsghdroffset + *sz + nlen + sizeof_rr_hdr_t + dlen;
if(newsz > *allocsz) {
/* Need to allocate more space.
To avoid frequent reallocs, we allocate
a multiple of a certain chunk size. */
size_t newallocsz= (newsz+ALLOCCHUNKSIZEMASK)&(~ALLOCCHUNKSIZEMASK);
dns_msg_t *newans=(dns_msg_t *)pdnsd_realloc(*ans,newallocsz);
if (!newans)
return 0;
*ans=newans;
*allocsz=newallocsz;
}
}
memcpy((unsigned char *)(&(*ans)->hdr)+ *sz, nbuf, nlen);
*sz += nlen;
}
/* the rr header will be filled in later. Just reserve some space for it. */
rrht= ((unsigned char *)(&(*ans)->hdr)) + *sz;
*sz += sizeof_rr_hdr_t;
switch (type) {
case T_CNAME:
case T_MB:
case T_MD:
case T_MF:
case T_MG:
case T_MR:
case T_NS:
case T_PTR:
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
PDNSD_ASSERT(rdlen <= dlen, "T_CNAME/T_MB/...: got longer");
*sz+=rdlen;
break;
#if IS_CACHED_MINFO || IS_CACHED_RP
#if IS_CACHED_MINFO
case T_MINFO:
#endif
#if IS_CACHED_RP
case T_RP:
#endif
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
*sz+=rdlen;
ilen=rhnlen((unsigned char *)data);
PDNSD_ASSERT(rdlen <= ilen, "T_MINFO/T_RP: got longer");
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen+=blen;
PDNSD_ASSERT(rdlen <= dlen, "T_MINFO/T_RP: got longer");
*sz+=blen;
break;
#endif
case T_MX:
#if IS_CACHED_AFSDB
case T_AFSDB:
#endif
#if IS_CACHED_RT
case T_RT:
#endif
#if IS_CACHED_KX
case T_KX:
#endif
PDNSD_ASSERT(dlen > 2, "T_MX/T_AFSDB/...: rr botch");
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
*sz+=2;
if (!(blen=compress_name(((unsigned char *)data)+2, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen=2+blen;
PDNSD_ASSERT(rdlen <= dlen, "T_MX/T_AFSDB/...: got longer");
*sz+=blen;
break;
case T_SOA:
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
*sz+=rdlen;
ilen=rhnlen((unsigned char *)data);
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen+=blen;
*sz+=blen;
ilen+=rhnlen(((unsigned char *)data)+ilen);
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,20);
rdlen+=20;
PDNSD_ASSERT(rdlen <= dlen, "T_SOA: rr botch");
*sz+=20;
break;
#if IS_CACHED_PX
case T_PX:
PDNSD_ASSERT(dlen > 2, "T_PX: rr botch");
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
*sz+=2;
ilen=2;
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen=2+blen;
*sz+=blen;
ilen+=rhnlen(((unsigned char *)data)+ilen);
PDNSD_ASSERT(rdlen <= ilen, "T_PX: got longer");
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen+=blen;
PDNSD_ASSERT(rdlen <= dlen, "T_PX: got longer");
*sz+=blen;
break;
#endif
#if IS_CACHED_SRV
case T_SRV:
PDNSD_ASSERT(dlen > 6, "T_SRV: rr botch");
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,6);
*sz+=6;
if (!(blen=compress_name(((unsigned char *)data)+6, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen=6+blen;
PDNSD_ASSERT(rdlen <= dlen, "T_SRV: got longer");
*sz+=blen;
break;
#endif
#if IS_CACHED_NXT
case T_NXT:
if (!(blen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen=blen;
*sz+=blen;
ilen=rhnlen((unsigned char *)data);
PDNSD_ASSERT(rdlen <= ilen, "T_NXT: got longer");
PDNSD_ASSERT(dlen >= ilen, "T_NXT: rr botch");
if (dlen > ilen) {
unsigned int wlen = dlen - ilen;
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,wlen);
*sz+=wlen;
rdlen+=wlen;
}
break;
#endif
#if IS_CACHED_NAPTR
case T_NAPTR:
PDNSD_ASSERT(dlen > 4, "T_NAPTR: rr botch");
ilen=4;
{
int j;
for (j=0;j<3;j++) {
ilen += ((unsigned)*(((unsigned char *)data)+ilen)) + 1;
PDNSD_ASSERT(dlen > ilen, "T_NAPTR: rr botch 2");
}
}
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),ilen);
(*sz)+=ilen;
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
return 0;
rdlen=ilen+blen;
PDNSD_ASSERT(rdlen <= dlen, "T_NAPTR: got longer");
*sz+=blen;
break;
#endif
default:
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),dlen);
rdlen=dlen;
*sz+=dlen;
}
if (udp && *sz>*udp && section==S_ADDITIONAL) /* only add the record if we do not increase the length over 512 */
*sz=osz; /* (or possibly more if the request used EDNS) in additionals for udp answer. */
else {
PUTINT16(type,rrht);
PUTINT16(C_IN,rrht);
PUTINT32(ttl,rrht);
PUTINT16(rdlen,rrht);
switch (section) {
case S_ANSWER:
(*ans)->hdr.ancount=htons(ntohs((*ans)->hdr.ancount)+1);
break;
case S_AUTHORITY:
(*ans)->hdr.nscount=htons(ntohs((*ans)->hdr.nscount)+1);
break;
case S_ADDITIONAL:
(*ans)->hdr.arcount=htons(ntohs((*ans)->hdr.arcount)+1);
break;
}
}
return 1;
}
/* Add an OPT pseudo RR containing EDNS info.
Can only be added to the additional section!
*/
int add_opt_pseudo_rr(dns_msg_t **ans, size_t *sz, size_t *allocsz,
unsigned short udpsize, unsigned short rcode,
unsigned short ednsver, unsigned short Zflags)
{
unsigned char *ptr;
size_t newsz= dnsmsghdroffset + *sz + sizeof_opt_pseudo_rr;
if(newsz > *allocsz) {
/* Need to allocate more space.
To avoid frequent reallocs, we allocate
a multiple of a certain chunk size. */
size_t newallocsz= (newsz+ALLOCCHUNKSIZEMASK)&(~ALLOCCHUNKSIZEMASK);
dns_msg_t *newans=(dns_msg_t *)pdnsd_realloc(*ans,newallocsz);
if (!newans)
return 0;
*ans=newans;
*allocsz=newallocsz;
}
ptr= ((unsigned char *)(&(*ans)->hdr)) + *sz;
*ptr++ = 0; /* Empty name */
PUTINT16(T_OPT,ptr); /* type field */
PUTINT16(udpsize,ptr); /* class field */
*ptr++ = rcode>>4; /* 4 byte TTL field */
*ptr++ = ednsver;
PUTINT16(Zflags,ptr);
PUTINT16(0,ptr); /* rdlen field */
/* Empty RDATA. */
*sz += sizeof_opt_pseudo_rr;
/* Increment arcount field in dns header. */
(*ans)->hdr.arcount = htons(ntohs((*ans)->hdr.arcount)+1);
return 1;
}
/* Remove the last entry in the additional section,
assuming it is an OPT pseudo RR of fixed size.
Returns the new message size if successful, or
zero if an inconsistency is detected.
*/
size_t remove_opt_pseudo_rr(dns_msg_t *ans, size_t sz)
{
uint16_t acnt=ntohs(ans->hdr.arcount), type;
unsigned char *ptr;
/* First do some sanity checks. */
if(!(acnt>0 && sz >= sizeof(dns_hdr_t)+sizeof_opt_pseudo_rr))
return 0;
sz -= sizeof_opt_pseudo_rr;
ptr= ((unsigned char *)(&ans->hdr)) + sz;
if(*ptr++)
return 0; /* Name must be empty. */
GETINT16(type,ptr);
if(type!=T_OPT)
return 0; /* RR type must be OPT. */
/* Decrement arcount field in dns header. */
ans->hdr.arcount = htons(acnt-1);
return sz;
}
typedef struct rre_s {
unsigned short tp;
unsigned short tsz; /* Size of tnm field */
uint32_t ttl; /* ttl of the record in the answer (if tp==T_NS or T_SOA) */
unsigned char tnm[0]; /* Name for the domain a record refers to */
/* unsigned char nm[0]; */ /* Name of the domain the record is for (if tp==T_NS or T_SOA) */
} rr_ext_t;
/* types for the tp field */
/* #define RRETP_NS T_NS */ /* For name server: add to authority, add address to additional. */
/* #define RRETP_SOA T_SOA */ /* For SOA record: add to authority. */
#define RRETP_ADD 0 /* For other records: add the address of buf to additional */
static int add_ar(dlist *ar,unsigned short tp, unsigned short tsz,void *tnm,unsigned char *nm, uint32_t ttl)
{
rr_ext_t *re;
unsigned char *p;
size_t nmsz=0,size=sizeof(rr_ext_t)+tsz;
if(tp==T_NS || tp==T_SOA) {
nmsz=rhnlen(nm);
size += nmsz;
}
if (!(*ar=dlist_grow(*ar,size)))
return 0;
re=dlist_last(*ar);
re->tp=tp;
re->tsz=tsz;
re->ttl=ttl;
p=mempcpy(re->tnm,tnm,tsz);
if(tp==T_NS || tp==T_SOA) {
memcpy(p,nm,nmsz);
}
return 1;
}
/* Select a random rr record from a list. */
inline static rr_bucket_t *randrr(rr_bucket_t *rrb)
{
rr_bucket_t *rr;
unsigned cnt=0;
/* In order to have an equal chance for each record to be selected, we have to count first. */
for(rr=rrb; rr; rr=rr->next) ++cnt;
/* We do not use the pdnsd random functions (these might use /dev/urandom if the user is paranoid,
* and we do not need any good PRNG here). */
if(cnt) for(cnt=random()%cnt; cnt; --cnt) rrb=rrb->next;
return rrb;
}
#if IS_CACHED_SRV
#define AR_NUM 6
#else
#define AR_NUM 5
#endif
static const int ar_recs[AR_NUM]={T_NS, T_MD, T_MF, T_MB, T_MX
#if IS_CACHED_SRV
,T_SRV
#endif
};
/* offsets from record data start to server name */
static const int ar_offs[AR_NUM]={0,0,0,0,2
#if IS_CACHED_SRV
,6
#endif
};
/* This adds an rrset, optionally randomizing the first element it adds.
* if that is done, all rrs after the randomized one appear in order, starting from
* that one and wrapping over if needed. */
static int add_rrset(dns_msg_t **ans, size_t *sz, size_t *allocsz,
unsigned char *rrn, unsigned tp, time_t queryts,
dns_cent_t *cached, unsigned *udp, dlist *cb, dlist *sva, dlist *ar)
{
rr_set_t *crrset=getrrset(cached,tp);
if (crrset && crrset->rrs) {
rr_bucket_t *b;
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
int i;
short rnd_recs=global.rnd_recs;
b=crrset->rrs;
if (rnd_recs) b=first=randrr(crrset->rrs);
while (b) {
if (!add_rr(ans, sz, allocsz, rrn, tp, ans_ttl(crrset,queryts),
b->rdlen, b->data, S_ANSWER, udp, cb))
return 0;
if (tp==T_NS || tp==T_A || tp==T_AAAA) {
/* mark it as added */
if (!sva_add(sva,rrn,tp,b->rdlen,b->data))
return 0;
}
/* Mark for additional address records. XXX: this should be a more effective algorithm; at least the list is small */
for (i=0;irdlen-ar_offs[i],((unsigned char *)(b->data))+ar_offs[i],
ucharp "", 0))
return 0;
break;
}
}
b=b->next;
if (rnd_recs) {
if(!b) b=crrset->rrs; /* wraparound */
if(b==first) break;
}
}
}
return 1;
}
/*
* Add the fitting elements of the cached record to the message in ans, where ans
* is grown to fit, sz is the size of the packet and is modified to be the new size.
* The query is in qe.
* cb is the buffer used for message compression. *cb should be NULL if you call add_to_response
* the first time. It gets filled with a pointer to compression information that can be
* reused in subsequent calls to add_to_response.
*/
static int add_to_response(dns_msg_t **ans, size_t *sz, size_t *allocsz,
unsigned char *rrn, unsigned qtype, time_t queryts,
dns_cent_t *cached, unsigned *udp, dlist *cb, dlist *sva, dlist *ar)
{
/* First of all, unless we have records of qtype, add cnames.
Well, actually, there should be at max one cname. */
if (qtype!=T_CNAME && qtype!=QT_ALL && !(qtype>=T_MIN && qtype<=T_MAX && have_rr(cached,qtype)))
if (!add_rrset(ans, sz, allocsz, rrn, T_CNAME, queryts, cached, udp, cb, sva, ar))
return 0;
/* We need no switch for qclass, since we already have filtered packets we cannot understand */
if (qtype==QT_AXFR || qtype==QT_IXFR) {
/* I do not know what to do in this case. Since we do not maintain zones (and since we are
no master server, so it is not our task), I just return an error message. If anyone
knows how to do this better, please notify me.
Anyway, this feature is rarely used in client communication, and there is no need for
other name servers to ask pdnsd. Btw: many bind servers reject an ?XFR query for security
reasons. */
return 0;
} else if (qtype==QT_MAILB) {
if (!add_rrset(ans, sz, allocsz, rrn, T_MB, queryts, cached, udp, cb, sva, ar))
return 0;
if (!add_rrset(ans, sz, allocsz, rrn, T_MG, queryts, cached, udp, cb, sva, ar))
return 0;
if (!add_rrset(ans, sz, allocsz, rrn, T_MR, queryts, cached, udp, cb, sva, ar))
return 0;
} else if (qtype==QT_MAILA) {
if (!add_rrset(ans, sz, allocsz, rrn, T_MD, queryts, cached, udp, cb, sva, ar))
return 0;
if (!add_rrset(ans, sz, allocsz, rrn, T_MF, queryts, cached, udp, cb, sva, ar))
return 0;
} else if (qtype==QT_ALL) {
int i, n= NRRITERLIST(cached);
const unsigned short *iterlist= RRITERLIST(cached);
for (i=0; i=T_MIN && qtype<=T_MAX) {
if (!add_rrset(ans, sz, allocsz, rrn, qtype, queryts, cached, udp, cb, sva, ar))
return 0;
} else /* Shouldn't get here. */
return 0;
#if 0
if (!ntohs((*ans)->hdr.ancount)) {
/* Add a SOA if we have one and no other records are present in the answer.
* This is to aid caches so that they have a ttl. */
if (!add_rrset(ans, sz, allocsz, rrn, T_SOA , queryts, cached, udp, cb, sva, ar))
return 0;
}
#endif
return 1;
}
/*
* Add an additional
*/
static int add_additional_rr(dns_msg_t **ans, size_t *rlen, size_t *allocsz,
unsigned char *rhn, unsigned tp, time_t ttl,
unsigned dlen, void *data, int sect, unsigned *udp, dlist *cb, dlist *sva)
{
sva_t *st;
/* Check if already added; no double additionals */
for (st=dlist_first(*sva); st; st=dlist_next(st)) {
if (st->tp==tp && rhnicmp(st->nm,rhn) && st->dlen==dlen &&
(memcmp(skiprhn(st->nm),data, dlen)==0))
{
return 1;
}
}
/* add_rr will do nothing when udp!=NULL and sz>*udp. */
if(!add_rr(ans, rlen, allocsz, rhn, tp, ttl, dlen, data, sect, udp, cb))
return 0;
/* mark it as added */
if (!sva_add(sva,rhn,tp,dlen,data))
return 0;
return 1;
}
/*
* Add one or more additionals from an rr bucket.
*/
static int add_additional_rrs(dns_msg_t **ans, size_t *rlen, size_t *allocsz,
unsigned char *rhn, unsigned tp, time_t ttl,
rr_bucket_t *rrb, int sect, unsigned *udp, dlist *cb, dlist *sva)
{
rr_bucket_t *rr;
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
short rnd_recs=global.rnd_recs;
rr=rrb;
if (rnd_recs) rr=first=randrr(rrb);
while(rr) {
if (!add_additional_rr(ans, rlen, allocsz, rhn, tp, ttl, rr->rdlen,rr->data, sect, udp, cb, sva))
return 0;
rr=rr->next;
if (rnd_recs) {
if(!rr) rr=rrb; /* wraparound */
if(rr==first) break;
}
}
return 1;
}
/*
* The code below actually handles A and AAAA additionals.
*/
static int add_additional_a(dns_msg_t **ans, size_t *rlen, size_t *allocsz,
unsigned char *rhn, time_t queryts,
unsigned *udp, dlist *cb, dlist *sva)
{
dns_cent_t *ae;
int retval = 1;
if ((ae=lookup_cache(rhn,NULL))) {
rr_set_t *rrset; rr_bucket_t *rr;
rrset=getrrset_A(ae);
if (rrset && (rr=rrset->rrs))
if (!add_additional_rrs(ans, rlen, allocsz,
rhn, T_A, ans_ttl(rrset,queryts),
rr, S_ADDITIONAL, udp, cb, sva))
retval = 0;
#if IS_CACHED_AAAA
if(retval) {
rrset=getrrset_AAAA(ae);
if (rrset && (rr=rrset->rrs))
if (!add_additional_rrs(ans, rlen, allocsz,
rhn, T_AAAA, ans_ttl(rrset,queryts),
rr, S_ADDITIONAL, udp, cb, sva))
retval = 0;
}
#endif
free_cent(ae DBG1);
pdnsd_free(ae);
}
return retval;
}
/*
* Compose an answer message for the decoded query in ql, hdr is the header of the dns request
* rlen is set to be the answer length.
* If udp is not NULL, *udp indicates the max length the dns response may have.
*/
static dns_msg_t *compose_answer(llist *ql, dns_hdr_t *hdr, size_t *rlen, edns_info_t *ednsinfo, unsigned *udp, int *rcodep)
{
unsigned short rcode=RC_OK, aa=1;
dlist cb=NULL;
dlist sva=NULL;
dlist ar=NULL;
time_t queryts=time(NULL);
dns_queryel_t *qe;
dns_msg_t *ans;
size_t allocsz= ALLOCINITIALSIZE;
dns_cent_t *cached;
ans=(dns_msg_t *)pdnsd_malloc(allocsz);
if (!ans)
goto return_ans;
ans->hdr.id=hdr->id;
ans->hdr.qr=QR_RESP;
ans->hdr.opcode=OP_QUERY;
ans->hdr.aa=0;
ans->hdr.tc=0; /* If tc is needed, it is set when the response is sent in udp_answer_thread. */
ans->hdr.rd=hdr->rd;
ans->hdr.ra=1;
ans->hdr.z=0;
ans->hdr.ad=0;
ans->hdr.cd=0;
ans->hdr.rcode=rcode;
ans->hdr.qdcount=0; /* this is first filled in and will be modified */
ans->hdr.ancount=0;
ans->hdr.nscount=0;
ans->hdr.arcount=0;
*rlen=sizeof(dns_hdr_t);
/* first, add the query to the response */
for (qe=llist_first(ql); qe; qe=llist_next(qe)) {
unsigned int qclen;
size_t newsz= dnsmsghdroffset + *rlen + rhnlen(qe->query) + 4;
if(newsz > allocsz) {
/* Need to allocate more space.
To avoid frequent reallocs, we allocate
a multiple of a certain chunk size. */
size_t newallocsz= (newsz+ALLOCCHUNKSIZEMASK)&(~ALLOCCHUNKSIZEMASK);
dns_msg_t *newans=(dns_msg_t *)pdnsd_realloc(ans,newallocsz);
if (!newans)
goto error_ans;
ans=newans;
allocsz=newallocsz;
}
{
unsigned char *p = ((unsigned char *)&ans->hdr) + *rlen;
/* the first name occurrence will not be compressed,
but the offset needs to be stored for future compressions */
if (!(qclen=compress_name(qe->query,p,*rlen,&cb)))
goto error_ans;
p += qclen;
PUTINT16(qe->qtype,p);
PUTINT16(qe->qclass,p);
}
*rlen += qclen+4;
ans->hdr.qdcount=htons(ntohs(ans->hdr.qdcount)+1);
}
/* Barf if we get a query we cannot answer */
for (qe=llist_first(ql); qe; qe=llist_next(qe)) {
if ((PDNSD_NOT_CACHED_TYPE(qe->qtype) &&
(qe->qtype!=QT_MAILB && qe->qtype!=QT_MAILA && qe->qtype!=QT_ALL)) ||
(qe->qclass!=C_IN && qe->qclass!=QC_ALL))
{
DEBUG_MSG("Unsupported QTYPE or QCLASS.\n");
ans->hdr.rcode=rcode=RC_NOTSUPP;
goto cleanup_return;
}
}
/* second, the answer section */
for (qe=llist_first(ql); qe; qe=llist_next(qe)) {
int hops;
unsigned char qname[DNSNAMEBUFSIZE];
rhncpy(qname,qe->query);
/* look if we have a cached copy. otherwise, perform a nameserver query. Same with timeout */
hops=MAX_HOPS;
do {
int rc;
unsigned char c_soa=cundef;
if ((rc=dns_cached_resolve(qname,qe->qtype, &cached, MAX_HOPS,queryts,&c_soa))!=RC_OK) {
ans->hdr.rcode=rcode=rc;
if(rc==RC_NAMEERR) {
if(c_soa!=cundef) {
/* Try to add a SOA record to the authority section. */
unsigned scnt=rhnsegcnt(qname);
if(c_soaflags&CF_NEGATIVE)) {
rr_bucket_t *rr;
for(rr=rrset->rrs; rr; rr=rr->next) {
if (!add_rr(&ans,rlen,&allocsz,cached->qname,T_SOA,ans_ttl(rrset,queryts),
rr->rdlen,rr->data,S_AUTHORITY,udp,&cb))
goto error_cached;
}
}
free_cent(cached DBG1);
pdnsd_free(cached);
}
}
/* Possibly add an OPT pseudo-RR to the additional section. */
if(ednsinfo) {
if(!add_opt_pseudo_rr(&ans, rlen, &allocsz, global.udpbufsize, rcode, 0,0))
goto error_ans;
}
}
goto cleanup_return;
}
if(!(cached->flags&DF_LOCAL))
aa=0;
if (!add_to_response(&ans,rlen,&allocsz,qname,qe->qtype,queryts,cached,udp,&cb,&sva,&ar))
goto error_cached;
if (hdr->rd && qe->qtype!=T_CNAME && qe->qtype!=QT_ALL &&
!(qe->qtype>=T_MIN && qe->qtype<=T_MAX && have_rr(cached,qe->qtype)) &&
follow_cname_chain(cached,qname))
/* The rd bit is set and the response does not contain records of the requested type,
* but the response does contain a cname, so repeat the inquiry with the cname.
* add_to_response() has already added the cname to the response.
* Because of follow_cname_chain(), qname now contains the last cname in the chain. */
;
else {
/* maintain a list (ar) for authority records: We will add every name server that was
listed as authoritative in a reply we received (and only those) to this list.
This list will be used to fill the authority and additional sections of our own reply.
We only do this for the last record in a cname chain, to prevent answer bloat. */
rr_set_t *rrset;
int rretp=T_NS;
if((qe->qtype>=T_MIN && qe->qtype<=T_MAX && !have_rr(cached,qe->qtype)) ||
(qe->qtype==QT_MAILB && !have_rr_MB(cached) && !have_rr_MG(cached) && !have_rr_MR(cached)) ||
(qe->qtype==QT_MAILA && !have_rr_MD(cached) && !have_rr_MF(cached)))
{
/* no record of requested type in the answer section. */
rretp=T_SOA;
}
rrset=getrrset(cached,rretp);
if(rrset && (rrset->flags&CF_NEGATIVE))
rrset=NULL;
if(!rrset) {
/* Try to find a name server higher up the hierarchy .
*/
dns_cent_t *prev=cached;
unsigned scnt=rhnsegcnt(prev->qname);
unsigned tcnt=(rretp==T_NS?prev->c_ns:prev->c_soa);
if((cached=lookup_cache((tcnt!=cundef && tcntqname,scnt-tcnt):prev->qname,NULL))) {
rrset=getrrset(cached,rretp);
if(rrset && (rrset->flags&CF_NEGATIVE))
rrset=NULL;
}
if(!rrset && (prev->flags&DF_LOCAL)) {
unsigned char *nm=getlocalowner(prev->qname,rretp);
if(nm) {
if(cached) {
free_cent(cached DBG1);
pdnsd_free(cached);
}
if((cached=lookup_cache(nm,NULL)))
rrset=getrrset(cached,rretp);
}
}
free_cent(prev DBG1);
pdnsd_free(prev);
}
if (rrset) {
rr_bucket_t *rr;
for (rr=rrset->rrs; rr; rr=rr->next) {
if (!add_ar(&ar, rretp, rr->rdlen,rr->data, cached->qname,
ans_ttl(rrset,queryts)))
goto error_cached;
}
}
hops=0; /* this will break the loop */
}
if(cached) {
free_cent(cached DBG1);
pdnsd_free(cached);
}
} while (--hops>=0);
}
{
rr_ext_t *rre;
/* Add the authority section */
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
if (rre->tp == T_NS || rre->tp == T_SOA) {
unsigned char *nm = rre->tnm + rre->tsz;
if (!add_additional_rr(&ans, rlen, &allocsz,
nm, rre->tp, rre->ttl, rre->tsz, rre->tnm,
S_AUTHORITY, udp, &cb, &sva))
{
goto error_ans;
}
}
}
/* Add the additional section, but only if we stay within the UDP buffer limit. */
/* If a pseudo RR doesn't fit, nothing else will. */
if(!(udp && *rlen+sizeof_opt_pseudo_rr>*udp)) {
/* Possibly add an OPT pseudo-RR to the additional section. */
if(ednsinfo) {
if(!add_opt_pseudo_rr(&ans, rlen, &allocsz, global.udpbufsize, rcode, 0,0))
goto error_ans;
}
/* now add the name server addresses */
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
if (rre->tp == T_NS || rre->tp == RRETP_ADD) {
if (!add_additional_a(&ans, rlen, &allocsz,
rre->tnm, queryts, udp, &cb, &sva))
goto error_ans;
}
}
}
}
if (aa)
ans->hdr.aa=1;
goto cleanup_return;
/* You may not like goto's, but here we avoid lots of code duplication. */
error_cached:
free_cent(cached DBG1);
pdnsd_free(cached);
error_ans:
pdnsd_free(ans);
ans=NULL;
cleanup_return:
dlist_free(ar);
dlist_free(sva);
dlist_free(cb);
return_ans:
if(rcodep) *rcodep=rcode;
return ans;
}
/*
* Decode the query (the query messgage is in data and rlen bytes long) into a dlist.
* XXX: data needs to be aligned.
* The return value can be RC_OK or RC_TRUNC, in which case the (partially) constructed list is
* returned in qp, or something else (RC_FORMAT or RC_SERVFAIL), in which case no list is returned.
*
* *ptrrem will be assigned the address just after the questions sections in the message, and *lenrem
* the remaining message length after the questions section. These values are only meaningful if the
* return value is RC_OK.
*/
static int decode_query(unsigned char *data, size_t rlen, unsigned char **ptrrem, size_t *lenrem, llist *qp)
{
int i,res=RC_OK;
dns_hdr_t *hdr=(dns_hdr_t *)data; /* aligned, so no prob. */
unsigned char *ptr=(unsigned char *)(hdr+1);
size_t sz= rlen - sizeof(dns_hdr_t);
uint16_t qdcount=ntohs(hdr->qdcount);
llist_init(qp);
for (i=0; iqtype,ptr);
GETINT16(qe->qclass,ptr);
sz-=4;
memcpy(qe->query,qbuf,qlen);
}
if(ptrrem) *ptrrem=ptr;
if(lenrem) *lenrem=sz;
return res;
}
/* Scan the additional section of a query message for an OPT pseudo RR.
data and rlen are as in decode_query(). Note in particular that data needs to be aligned!
ptr should point the beginning of the additional section, sz should contain the
length of this remaining part of the message and numrr the number of resource records in the section.
*numopt is incremented with the number of OPT RRs found (should be at most one).
Note that a return value of RC_OK means the additional section was parsed without errors, not that
an OPT pseudo RR was found! Check the value of *numopt for the latter.
The structure pointed to by ep is filled with the information of the first OPT pseudo RR found,
but only if *numopt was set to zero before the call.
*/
static int decode_query_additional(unsigned char *data, size_t rlen, unsigned char *ptr, size_t sz, int numrr,
int *numopt, edns_info_t *ep)
{
int i, res;
for (i=0; i0
if(nmbuf[0]!=0) {
DEBUG_MSG("decode_query_additional: name in OPT record not empty!\n");
}
#endif
ep->udpsize= class;
ep->rcode= ((uint16_t)ttlp[0]<<4) | ((dns_hdr_t *)data)->rcode;
ep->version= ttlp[1];
ep->do_flg= (ttlp[2]>>7)&1;
#if DEBUG>0
if(debug_p) {
unsigned int Zflags= ((uint16_t)ttlp[2]<<8) | ttlp[3];
if(Zflags & 0x7fff) {
DEBUG_MSG("decode_query_additional: Z field contains unknown nonzero bits (%04x).\n",
Zflags);
}
if(rdlen) {
DEBUG_MSG("decode_query_additional: RDATA field in OPT record not empty!\n");
}
}
#endif
}
else {
DEBUG_MSG("decode_query_additional: ingnoring surplus OPT record.\n");
}
}
else {
DEBUG_MSG("decode_query_additional: ignoring record of type %s (%d).\n",
getrrtpname(type), type);
}
/* Skip RDATA field. */
sz -= rdlen;
ptr += rdlen;
}
return RC_OK;
}
/* Make a dns error reply message
* Id is the query id and still in network order.
* op is the opcode to fill in, rescode - name says it all.
*/
static void mk_error_reply(unsigned short id, unsigned short opcode,unsigned short rescode,dns_hdr_t *rep)
{
rep->id=id;
rep->qr=QR_RESP;
rep->opcode=opcode;
rep->aa=0;
rep->tc=0;
rep->rd=0;
rep->ra=1;
rep->z=0;
rep->ad=0;
rep->cd=0;
rep->rcode=rescode;
rep->qdcount=0;
rep->ancount=0;
rep->nscount=0;
rep->arcount=0;
}
/*
* Analyze and answer the query in data. The answer is returned. rlen is at call the query length and at
* return the length of the answer. You have to free the answer after sending it.
*/
static dns_msg_t *process_query(unsigned char *data, size_t *rlenp, unsigned *udp, int *rcodep)
{
size_t rlen= *rlenp;
int res;
dns_hdr_t *hdr;
llist ql;
dns_msg_t *ans;
edns_info_t ednsinfo= {0}, *ednsinfop= NULL;
DEBUG_MSG("Received query (msg len=%u).\n", (unsigned int)rlen);
DEBUG_DUMP_DNS_MSG(data, rlen);
/*
* We will ignore all records that come with a query, except for the actual query records,
* and possible OPT pseudo RRs in the addtional section.
* We will send back the query in the response. We will reject all non-queries, and
* some not supported thingies.
* If anyone notices behaviour that is not in standard conformance, please notify me!
*/
hdr=(dns_hdr_t *)data;
if (rlen<2) {
DEBUG_MSG("Message too short.\n");
return NULL; /* message too short: no id provided. */
}
if (rlenqr!=QR_QUERY) {
DEBUG_MSG("The QR bit indicates this is a response, not a query.\n");
return NULL; /* RFC says: discard */
}
if (hdr->opcode!=OP_QUERY) {
DEBUG_MSG("Not a standard query (opcode=%u).\n",hdr->opcode);
res=RC_NOTSUPP;
goto error_reply;
}
#if DEBUG>0
if(debug_p) {
char flgsbuf[DNSFLAGSMAXSTRSIZE];
dnsflags2str(hdr, flgsbuf);
if(flgsbuf[0]) {
DEBUG_MSG("Flags:%s\n", flgsbuf);
}
}
#endif
if (hdr->z!=0) {
DEBUG_MSG("Malformed query (nonzero Z bit).\n");
res=RC_FORMAT;
goto error_reply;
}
if (hdr->rcode!=RC_OK) {
DEBUG_MSG("Bad rcode(%u).\n",hdr->rcode);
return NULL; /* discard (may cause error storms) */
}
if (hdr->ancount) {
DEBUG_MSG("Query has a non-empty answer section!\n");
res=RC_FORMAT;
goto error_reply;
}
if (hdr->nscount) {
DEBUG_MSG("Query has a non-empty authority section!\n");
res=RC_FORMAT;
goto error_reply;
}
#if 0
/* The following only makes sense if we completely disallow
Extension Mechanisms for DNS (RFC 2671). */
if (hdr->arcount) {
DEBUG_MSG("Query has a non-empty additional section!\n");
res=RC_FORMAT;
goto error_reply;
}
#endif
{
unsigned char *ptr;
size_t sz;
uint16_t arcount;
res=decode_query(data,rlen,&ptr,&sz,&ql);
if(res!=RC_OK) {
if(res==RC_TRUNC) {
if(!hdr->tc || llist_isempty(&ql)) {
res=RC_FORMAT;
goto free_ql_error_reply;
}
}
else
goto error_reply;
}
if ((arcount=ntohs(hdr->arcount))) {
int numoptrr= 0;
DEBUG_MSG("Query has a non-empty additional section: "
"checking for OPT pseudo-RR.\n");
if(res==RC_TRUNC) {
DEBUG_MSG("Additional section cannot be read due to truncation!\n");
res=RC_FORMAT;
goto free_ql_error_reply;
}
res=decode_query_additional(data,rlen,ptr,sz,arcount, &numoptrr, &ednsinfo);
if(!(res==RC_OK || (res==RC_TRUNC && hdr->tc))) {
res=RC_FORMAT;
goto free_ql_error_reply;
}
if(numoptrr) {
#if DEBUG>0
if(numoptrr!=1) {
DEBUG_MSG("Additional section in query contains %d OPT pseudo-RRs!\n", numoptrr);
}
#endif
if(ednsinfo.version!=0) {
DEBUG_MSG("Query contains unsupported EDNS version %d!\n", ednsinfo.version);
res=RC_BADVERS;
goto free_ql_error_reply;
}
if(ednsinfo.rcode!=0) {
DEBUG_MSG("Query contains non-zero EDNS rcode (%d)!\n", ednsinfo.rcode);
res=RC_FORMAT;
goto free_ql_error_reply;
}
DEBUG_MSG("Query contains OPT pseudosection: EDNS udp size = %u, flag DO=%u\n",
ednsinfo.udpsize, ednsinfo.do_flg);
ednsinfop = &ednsinfo;
if(udp && ednsinfo.udpsize>UDP_BUFSIZE) {
unsigned udpbufsize = global.udpbufsize;
if(udpbufsize > ednsinfo.udpsize)
udpbufsize = ednsinfo.udpsize;
*udp = udpbufsize;
}
}
}
}
#if DEBUG>0
if (debug_p) {
if(!llist_isempty(&ql)) {
dns_queryel_t *qe;
DEBUG_MSG("Questions are:\n");
for (qe=llist_first(&ql); qe; qe=llist_next(qe)) {
DEBUG_RHN_MSG("\tqc=%s (%u), qt=%s (%u), query=\"%s\"\n",
get_cname(qe->qclass),qe->qclass,get_tname(qe->qtype),qe->qtype,RHN2STR(qe->query));
}
}
else {
DEBUG_MSG("Query contains no questions.\n");
}
}
#endif
if (llist_isempty(&ql)) {
res=RC_FORMAT;
goto error_reply;
}
if (!(ans=compose_answer(&ql, hdr, rlenp, ednsinfop, udp, rcodep))) {
/* An out of memory condition or similar could cause NULL output. Send failure notification */
res=RC_SERVFAIL;
goto free_ql_error_reply;
}
llist_free(&ql);
return ans;
free_ql_error_reply:
llist_free(&ql);
error_reply:
*rlenp=sizeof(dns_hdr_t);
{
size_t allocsz = sizeof(dns_msg_t);
if(res&~0xf)
allocsz += sizeof_opt_pseudo_rr;
ans= (dns_msg_t *)pdnsd_malloc(allocsz);
if (ans) {
mk_error_reply(hdr->id,rlen>=3?hdr->opcode:OP_QUERY,res,&ans->hdr);
if(res&~0xf)
add_opt_pseudo_rr(&ans,rlenp,&allocsz,
global.udpbufsize,res,0,0);
}
else if (++da_mem_errs<=MEM_MAX_ERRS) {
log_error("Out of memory in query processing.");
}
}
if(rcodep) *rcodep= res;
return ans;
}
/*
* Called by *_answer_thread exit handler to clean up process count.
*/
inline static void decrease_procs()
{
pthread_mutex_lock(&proc_lock);
procs--;
qprocs--;
pthread_mutex_unlock(&proc_lock);
}
static void udp_answer_thread_cleanup(void *data)
{
pdnsd_free(data);
decrease_procs();
}
/*
* A thread opened to answer a query transmitted via udp. Data is a pointer to the structure udp_buf_t that
* contains the received data and various other parameters.
* After the query is answered, the thread terminates
* XXX: data must point to a correctly aligned buffer
*/
static void *udp_answer_thread(void *data)
{
struct msghdr msg;
struct iovec v;
struct cmsghdr *cmsg;
#if defined(SRC_ADDR_DISC)
char ctrl[CMSG_SPACE(sizeof(pkt_info_t))];
#endif
size_t rlen=((udp_buf_t *)data)->len;
unsigned udpmaxrespsize = UDP_BUFSIZE;
/* XXX: process_query is assigned to this, this mallocs, so this points to aligned memory */
dns_msg_t *resp;
int rcode;
unsigned thrid;
pthread_cleanup_push(udp_answer_thread_cleanup, data);
THREAD_SIGINIT;
if (!global.strict_suid) {
if (!run_as(global.run_as)) {
pdnsd_exit();
}
}
for(;;) {
pthread_mutex_lock(&proc_lock);
if (procs0
if(debug_p) {
int err;
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
if(++da_misc_errs<=MISC_MAX_ERRS)
log_error("pthread_setspecific failed: %s",strerror(err));
/* pdnsd_exit(); */
}
}
#endif
if (!(resp=process_query(((udp_buf_t *)data)->buf,&rlen,&udpmaxrespsize,&rcode))) {
/*
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
* logging is already done. Just exit the thread now.
*/
pthread_exit(NULL); /* data freed by cleanup handler */
}
pthread_cleanup_push(free, resp);
if (rlen>udpmaxrespsize) {
rlen=udpmaxrespsize;
resp->hdr.tc=1; /*set truncated bit*/
}
DEBUG_MSG("Outbound msg len %li, tc=%u, rc=\"%s\"\n",(long)rlen,resp->hdr.tc,get_ename(rcode));
v.iov_base=(char *)&resp->hdr;
v.iov_len=rlen;
msg.msg_iov=&v;
msg.msg_iovlen=1;
#if (TARGET!=TARGET_CYGWIN)
#if defined(SRC_ADDR_DISC)
msg.msg_control=ctrl;
msg.msg_controllen=sizeof(ctrl);
#else
msg.msg_control=NULL;
msg.msg_controllen=0;
#endif
msg.msg_flags=0; /* to avoid warning message by Valgrind */
#endif
#ifdef ENABLE_IPV4
if (run_ipv4) {
msg.msg_name=&((udp_buf_t *)data)->addr.sin4;
msg.msg_namelen=sizeof(struct sockaddr_in);
# if defined(SRC_ADDR_DISC)
# if (TARGET==TARGET_LINUX)
((udp_buf_t *)data)->pi.pi4.ipi_spec_dst=((udp_buf_t *)data)->pi.pi4.ipi_addr;
cmsg=CMSG_FIRSTHDR(&msg);
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
cmsg->cmsg_level=SOL_IP;
cmsg->cmsg_type=IP_PKTINFO;
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi4,sizeof(struct in_pktinfo));
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_pktinfo));
# else
cmsg=CMSG_FIRSTHDR(&msg);
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
cmsg->cmsg_level=IPPROTO_IP;
cmsg->cmsg_type=IP_RECVDSTADDR;
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.ai4,sizeof(struct in_addr));
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_addr));
# endif
# endif
# if DEBUG>0
{
char buf[ADDRSTR_MAXLEN];
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET,&((udp_buf_t *)data)->addr.sin4.sin_addr,buf,ADDRSTR_MAXLEN));
# if defined(SRC_ADDR_DISC)
# if (TARGET==TARGET_LINUX)
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.pi4.ipi_spec_dst,buf,ADDRSTR_MAXLEN));
# else
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.ai4,buf,ADDRSTR_MAXLEN));
# endif
# else
DEBUG_MSGC("\n");
# endif
}
# endif /* DEBUG */
}
#endif
#ifdef ENABLE_IPV6
ELSE_IPV6 {
msg.msg_name=&((udp_buf_t *)data)->addr.sin6;
msg.msg_namelen=sizeof(struct sockaddr_in6);
# if defined(SRC_ADDR_DISC)
cmsg=CMSG_FIRSTHDR(&msg);
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in6_pktinfo));
cmsg->cmsg_level=SOL_IPV6;
cmsg->cmsg_type=IPV6_PKTINFO;
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi6,sizeof(struct in6_pktinfo));
msg.msg_controllen=CMSG_SPACE(sizeof(struct in6_pktinfo));
# endif
# if DEBUG>0
{
char buf[ADDRSTR_MAXLEN];
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET6,&((udp_buf_t *)data)->addr.sin6.sin6_addr,buf,ADDRSTR_MAXLEN));
# if defined(SRC_ADDR_DISC)
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET6,&((udp_buf_t *)data)->pi.pi6.ipi6_addr,buf,ADDRSTR_MAXLEN));
# else
DEBUG_MSGC("\n");
# endif
}
# endif /* DEBUG */
}
#endif
/* Lock the socket, and clear the error flag before dropping the lock */
#ifdef SOCKET_LOCKING
pthread_mutex_lock(&s_lock);
#endif
if (sendmsg(((udp_buf_t *)data)->sock,&msg,0)<0) {
#ifdef SOCKET_LOCKING
pthread_mutex_unlock(&s_lock);
#endif
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("Error in udp send: %s",strerror(errno));
}
} else {
int tmp;
socklen_t sl=sizeof(tmp);
getsockopt(((udp_buf_t *)data)->sock, SOL_SOCKET, SO_ERROR, &tmp, &sl);
#ifdef SOCKET_LOCKING
pthread_mutex_unlock(&s_lock);
#endif
}
pthread_cleanup_pop(1); /* free(resp) */
pthread_cleanup_pop(1); /* free(data) */
return NULL;
}
int init_udp_socket()
{
int sock;
int so=1;
union {
#ifdef ENABLE_IPV4
struct sockaddr_in sin4;
#endif
#ifdef ENABLE_IPV6
struct sockaddr_in6 sin6;
#endif
} sin;
socklen_t sinl;
#ifdef ENABLE_IPV4
if (run_ipv4) {
if ((sock=socket(PF_INET,SOCK_DGRAM,IPPROTO_UDP))==-1) {
log_error("Could not open udp socket: %s",strerror(errno));
return -1;
}
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
sin.sin4.sin_family=AF_INET;
sin.sin4.sin_port=htons(global.port);
sin.sin4.sin_addr=global.a.ipv4;
SET_SOCKA_LEN4(sin.sin4);
sinl=sizeof(struct sockaddr_in);
}
#endif
#ifdef ENABLE_IPV6
ELSE_IPV6 {
if ((sock=socket(PF_INET6,SOCK_DGRAM,IPPROTO_UDP))==-1) {
log_error("Could not open udp socket: %s",strerror(errno));
return -1;
}
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
sin.sin6.sin6_family=AF_INET6;
sin.sin6.sin6_port=htons(global.port);
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
sin.sin6.sin6_addr=global.a.ipv6;
SET_SOCKA_LEN6(sin.sin6);
sinl=sizeof(struct sockaddr_in6);
}
#endif
#ifdef SRC_ADDR_DISC
# if (TARGET!=TARGET_LINUX)
if (run_ipv4) {
# endif
/* The following must be set on any case because it also applies for IPv4 packets sent to
* ipv6 addresses. */
# if (TARGET==TARGET_LINUX )
if (setsockopt(sock,SOL_IP,IP_PKTINFO,&so,sizeof(so))!=0) {
# else
if (setsockopt(sock,IPPROTO_IP,IP_RECVDSTADDR,&so,sizeof(so))!=0) {
# endif
log_error("Could not set options on udp socket: %s",strerror(errno));
close(sock);
return -1;
}
# if (TARGET!=TARGET_LINUX)
}
# endif
# ifdef ENABLE_IPV6
if (!run_ipv4) {
if (setsockopt(sock,SOL_IPV6,IPV6_RECVPKTINFO,&so,sizeof(so))!=0) {
log_error("Could not set options on udp socket: %s",strerror(errno));
close(sock);
return -1;
}
}
# endif
#endif
if (bind(sock,(struct sockaddr *)&sin,sinl)!=0) {
log_error("Could not bind to udp socket: %s",strerror(errno));
close(sock);
return -1;
}
return sock;
}
/*
* Listen on the specified port for udp packets and answer them (each in a new thread to be nonblocking)
* This was changed to support sending UDP packets with exactly the same source address as they were coming
* to us, as required by rfc2181. Although this is a sensible requirement, it is slightly more difficult
* and may introduce portability issues.
*/
void *udp_server_thread(void *dummy)
{
int sock;
ssize_t qlen;
pthread_t pt;
udp_buf_t *buf;
struct msghdr msg;
struct iovec v;
struct cmsghdr *cmsg;
char ctrl[512];
#if defined(ENABLE_IPV6) && (TARGET==TARGET_LINUX)
struct in_pktinfo sip;
#endif
/* (void)dummy; */ /* To inhibit "unused variable" warning */
THREAD_SIGINIT;
if (!global.strict_suid) {
if (!run_as(global.run_as)) {
pdnsd_exit();
}
}
sock=udp_socket;
while (1) {
int udpbufsize= global.udpbufsize;
if (!(buf=(udp_buf_t *)pdnsd_calloc(1,sizeof(udp_buf_t)+udpbufsize))) {
if (++da_mem_errs<=MEM_MAX_ERRS) {
log_error("Out of memory in request handling.");
}
break;
}
buf->sock=sock;
v.iov_base=(char *)buf->buf;
v.iov_len=udpbufsize;
msg.msg_iov=&v;
msg.msg_iovlen=1;
#if (TARGET!=TARGET_CYGWIN)
msg.msg_control=ctrl;
msg.msg_controllen=sizeof(ctrl);
#endif
#if defined(SRC_ADDR_DISC)
# ifdef ENABLE_IPV4
if (run_ipv4) {
msg.msg_name=&buf->addr.sin4;
msg.msg_namelen=sizeof(struct sockaddr_in);
if ((qlen=recvmsg(sock,&msg,0))>=0) {
cmsg=CMSG_FIRSTHDR(&msg);
while(cmsg) {
# if (TARGET==TARGET_LINUX)
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
memcpy(&buf->pi.pi4,CMSG_DATA(cmsg),sizeof(struct in_pktinfo));
break;
}
# else
if (cmsg->cmsg_level==IPPROTO_IP && cmsg->cmsg_type==IP_RECVDSTADDR) {
memcpy(&buf->pi.ai4,CMSG_DATA(cmsg),sizeof(buf->pi.ai4));
break;
}
# endif
cmsg=CMSG_NXTHDR(&msg,cmsg);
}
if (!cmsg) {
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("Could not discover udp destination address");
}
goto free_buf_continue;
}
} else if (errno!=EINTR) {
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("error in UDP recv: %s", strerror(errno));
}
}
}
# endif
# ifdef ENABLE_IPV6
ELSE_IPV6 {
msg.msg_name=&buf->addr.sin6;
msg.msg_namelen=sizeof(struct sockaddr_in6);
if ((qlen=recvmsg(sock,&msg,0))>=0) {
cmsg=CMSG_FIRSTHDR(&msg);
while(cmsg) {
if (cmsg->cmsg_level==SOL_IPV6 && cmsg->cmsg_type==IPV6_PKTINFO) {
memcpy(&buf->pi.pi6,CMSG_DATA(cmsg),sizeof(struct in6_pktinfo));
break;
}
cmsg=CMSG_NXTHDR(&msg,cmsg);
}
if (!cmsg) {
/* We might have an IPv4 Packet incoming on our IPv6 port, so we also have to
* check for IPv4 sender addresses */
cmsg=CMSG_FIRSTHDR(&msg);
while(cmsg) {
# if (TARGET==TARGET_LINUX)
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
memcpy(&sip,CMSG_DATA(cmsg),sizeof(sip));
IPV6_MAPIPV4(&sip.ipi_addr,&buf->pi.pi6.ipi6_addr);
buf->pi.pi6.ipi6_ifindex=sip.ipi_ifindex;
break;
}
/* FIXME: What about BSD? probably ok, but... */
# endif
cmsg=CMSG_NXTHDR(&msg,cmsg);
}
if (!cmsg) {
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("Could not discover udp destination address");
}
goto free_buf_continue;
}
}
} else if (errno!=EINTR) {
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("error in UDP recv: %s", strerror(errno));
}
}
}
# endif
#else /* !SRC_ADDR_DISC */
# ifdef ENABLE_IPV4
if (run_ipv4) {
msg.msg_name=&buf->addr.sin4;
msg.msg_namelen=sizeof(struct sockaddr_in);
}
# endif
# ifdef ENABLE_IPV6
ELSE_IPV6 {
msg.msg_name=&buf->addr.sin6;
msg.msg_namelen=sizeof(struct sockaddr_in6);
}
# endif
qlen=recvmsg(sock,&msg,0);
if (qlen<0 && errno!=EINTR) {
if (++da_udp_errs<=UDP_MAX_ERRS) {
log_error("error in UDP recv: %s", strerror(errno));
}
}
#endif /* SRC_ADDR_DISC */
if (qlen>=0) {
pthread_mutex_lock(&proc_lock);
if (qprocslen=qlen;
err=pthread_create(&pt,&attr_detached,udp_answer_thread,(void *)buf);
if(err==0)
continue;
if(++da_thrd_errs<=THRD_MAX_ERRS)
log_warn("pthread_create failed: %s",strerror(err));
/* If thread creation failed, free resources associated with it. */
pthread_mutex_lock(&proc_lock);
--qprocs; --spawned;
}
++dropped;
pthread_mutex_unlock(&proc_lock);
}
free_buf_continue:
pdnsd_free(buf);
usleep_r(50000);
}
udp_socket=-1;
close(sock);
udps_thrid=main_thrid;
if (tcp_socket==-1)
pdnsd_exit();
return NULL;
}
#ifndef NO_TCP_SERVER
static void tcp_answer_thread_cleanup(void *csock)
{
close(*((int *)csock));
pdnsd_free(csock);
decrease_procs();
}
/*
* Process a dns query via tcp. The argument is a pointer to the socket.
*/
static void *tcp_answer_thread(void *csock)
{
/* XXX: This should be OK, the original must be (and is) aligned */
int sock=*((int *)csock);
unsigned thrid;
pthread_cleanup_push(tcp_answer_thread_cleanup, csock);
THREAD_SIGINIT;
if (!global.strict_suid) {
if (!run_as(global.run_as)) {
pdnsd_exit();
}
}
for(;;) {
pthread_mutex_lock(&proc_lock);
if (procs0
if(debug_p) {
int err;
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
if(++da_misc_errs<=MISC_MAX_ERRS)
log_error("pthread_setspecific failed: %s",strerror(err));
/* pdnsd_exit(); */
}
}
#endif
#ifdef TCP_SUBSEQ
/* rfc1035 says we should process multiple queries in succession, so we are looping until
* the socket is closed by the other side or by tcp timeout.
* This in fact makes DoSing easier. If that is your concern, you should disable pdnsd's
* TCP server.*/
for(;;)
#endif
{
int rlen,olen;
size_t nlen;
unsigned char *buf;
dns_msg_t *resp;
#ifdef NO_POLL
fd_set fds;
struct timeval tv;
FD_ZERO(&fds);
PDNSD_ASSERT(sock=2) { /* We need the id to send a valid reply. */
dns_msg_t err;
mk_error_reply(((dns_hdr_t*)buf)->id,
olen>=3?((dns_hdr_t*)buf)->opcode:OP_QUERY,
RC_FORMAT,
&err.hdr);
err.len=htons(sizeof(dns_hdr_t));
write_all(sock,&err,sizeof(err)); /* error anyway. */
}
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
}
olen += rv;
}
nlen=rlen;
if (!(resp=process_query(buf,&nlen,NULL,NULL))) {
/*
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
* logging is already done. Just exit the thread now.
*/
pthread_exit(NULL);
}
pthread_cleanup_pop(1); /* free(buf) */
pthread_cleanup_push(free,resp);
{
int err; size_t rsize;
resp->len=htons(nlen);
rsize=dnsmsghdroffset+nlen;
if ((err=write_all(sock,resp,rsize))!=rsize) {
DEBUG_MSG("Error while writing to TCP client: %s\n",err==-1?strerror(errno):"unknown error");
pthread_exit(NULL); /* resp is freed and socket is closed by cleanup handlers */
}
}
pthread_cleanup_pop(1); /* free(resp) */
}
/* socket is closed by cleanup handler */
pthread_cleanup_pop(1);
return NULL;
}
int init_tcp_socket()
{
int sock;
union {
#ifdef ENABLE_IPV4
struct sockaddr_in sin4;
#endif
#ifdef ENABLE_IPV6
struct sockaddr_in6 sin6;
#endif
} sin;
socklen_t sinl;
#ifdef ENABLE_IPV4
if (run_ipv4) {
if ((sock=socket(PF_INET,SOCK_STREAM,IPPROTO_TCP))==-1) {
log_error("Could not open tcp socket: %s",strerror(errno));
return -1;
}
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
sin.sin4.sin_family=AF_INET;
sin.sin4.sin_port=htons(global.port);
sin.sin4.sin_addr=global.a.ipv4;
SET_SOCKA_LEN4(sin.sin4);
sinl=sizeof(struct sockaddr_in);
}
#endif
#ifdef ENABLE_IPV6
ELSE_IPV6 {
if ((sock=socket(PF_INET6,SOCK_STREAM,IPPROTO_TCP))==-1) {
log_error("Could not open tcp socket: %s",strerror(errno));
return -1;
}
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
sin.sin6.sin6_family=AF_INET6;
sin.sin6.sin6_port=htons(global.port);
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
sin.sin6.sin6_addr=global.a.ipv6;
SET_SOCKA_LEN6(sin.sin6);
sinl=sizeof(struct sockaddr_in6);
}
#endif
{
int so=1;
/* The SO_REUSEADDR socket option tells the kernel that even if this port
is busy (in the TIME_WAIT state), go ahead and reuse it anyway. If it
is busy, but with another state, we should get an address already in
use error. It is useful if pdnsd is shut down, and then restarted right
away while sockets are still active on its port. There is a slight risk
though. If unexpected data comes in, it may confuse pdnsd, but while
this is possible, it is not likely.
*/
if(setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&so,sizeof(so)))
log_warn("Could not set options on tcp socket: %s",strerror(errno));
}
if (bind(sock,(struct sockaddr *)&sin,sinl)) {
log_error("Could not bind tcp socket: %s",strerror(errno));
close(sock);
return -1;
}
return sock;
}
/*
* Listen on the specified port for tcp connects and answer them (each in a new thread to be nonblocking)
*/
void *tcp_server_thread(void *p)
{
int sock;
pthread_t pt;
int *csock;
/* (void)p; */ /* To inhibit "unused variable" warning */
THREAD_SIGINIT;
if (!global.strict_suid) {
if (!run_as(global.run_as)) {
pdnsd_exit();
}
}
sock=tcp_socket;
if (listen(sock,5)) {
if (++da_tcp_errs<=TCP_MAX_ERRS) {
log_error("Could not listen on tcp socket: %s",strerror(errno));
}
goto close_sock_return;
}
while (1) {
if (!(csock=(int *)pdnsd_malloc(sizeof(int)))) {
if (++da_mem_errs<=MEM_MAX_ERRS) {
log_error("Out of memory in request handling.");
}
break;
}
if ((*csock=accept(sock,NULL,0))==-1) {
if (errno!=EINTR && ++da_tcp_errs<=TCP_MAX_ERRS) {
log_error("tcp accept failed: %s",strerror(errno));
}
} else {
/*
* With creating a new thread, we follow recommendations
* in rfc1035 not to block
*/
pthread_mutex_lock(&proc_lock);
if (qprocs