You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

5339 lines
192 KiB

  1. /*
  2. * This file is part of PowerDNS or dnsdist.
  3. * Copyright -- PowerDNS.COM B.V. and its contributors
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of version 2 of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * In addition, for the avoidance of any doubt, permission is granted to
  10. * link this program with OpenSSL and to (re)distribute the binaries
  11. * produced as the result of such linking.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21. */
  22. #ifdef HAVE_CONFIG_H
  23. #include "config.h"
  24. #endif
  25. #include <netdb.h>
  26. #include <sys/stat.h>
  27. #include <unistd.h>
  28. #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
  29. #include <boost/container/flat_set.hpp>
  30. #endif
  31. #include "ws-recursor.hh"
  32. #include <thread>
  33. #include "threadname.hh"
  34. #include "recpacketcache.hh"
  35. #include "utility.hh"
  36. #include "dns_random.hh"
  37. #ifdef HAVE_LIBSODIUM
  38. #include <sodium.h>
  39. #endif
  40. #include "opensslsigners.hh"
  41. #include <iostream>
  42. #include <errno.h>
  43. #include <boost/static_assert.hpp>
  44. #include <map>
  45. #include <set>
  46. #include "recursor_cache.hh"
  47. #include "cachecleaner.hh"
  48. #include <stdio.h>
  49. #include <signal.h>
  50. #include <stdlib.h>
  51. #include "misc.hh"
  52. #include "mtasker.hh"
  53. #include <utility>
  54. #include "arguments.hh"
  55. #include "syncres.hh"
  56. #include <fcntl.h>
  57. #include <fstream>
  58. #include "sortlist.hh"
  59. #include "sstuff.hh"
  60. #include <boost/tuple/tuple.hpp>
  61. #include <boost/tuple/tuple_comparison.hpp>
  62. #include <boost/shared_array.hpp>
  63. #include <boost/function.hpp>
  64. #include <boost/algorithm/string.hpp>
  65. #ifdef MALLOC_TRACE
  66. #include "malloctrace.hh"
  67. #endif
  68. #include <netinet/tcp.h>
  69. #include "capabilities.hh"
  70. #include "dnsparser.hh"
  71. #include "dnswriter.hh"
  72. #include "dnsrecords.hh"
  73. #include "zoneparser-tng.hh"
  74. #include "rec_channel.hh"
  75. #include "logger.hh"
  76. #include "iputils.hh"
  77. #include "mplexer.hh"
  78. #include "config.h"
  79. #include "lua-recursor4.hh"
  80. #include "version.hh"
  81. #include "responsestats.hh"
  82. #include "secpoll-recursor.hh"
  83. #include "dnsname.hh"
  84. #include "filterpo.hh"
  85. #include "rpzloader.hh"
  86. #include "validate-recursor.hh"
  87. #include "rec-lua-conf.hh"
  88. #include "ednsoptions.hh"
  89. #include "gettime.hh"
  90. #include "proxy-protocol.hh"
  91. #include "pubsuffix.hh"
  92. #include "shuffle.hh"
  93. #ifdef NOD_ENABLED
  94. #include "nod.hh"
  95. #endif /* NOD_ENABLED */
  96. #include "query-local-address.hh"
  97. #include "rec-protobuf.hh"
  98. #include "rec-snmp.hh"
  99. #ifdef HAVE_SYSTEMD
  100. #include <systemd/sd-daemon.h>
  101. #endif
  102. #include "namespaces.hh"
  103. #ifdef HAVE_PROTOBUF
  104. #include "uuid-utils.hh"
  105. #endif /* HAVE_PROTOBUF */
  106. #include "xpf.hh"
  107. typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
  108. static thread_local std::shared_ptr<RecursorLua4> t_pdl;
  109. static thread_local unsigned int t_id = 0;
  110. static thread_local std::shared_ptr<Regex> t_traceRegex;
  111. static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
  112. #ifdef HAVE_PROTOBUF
  113. static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
  114. static thread_local uint64_t t_protobufServersGeneration;
  115. static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
  116. static thread_local uint64_t t_outgoingProtobufServersGeneration;
  117. #endif /* HAVE_PROTOBUF */
  118. #ifdef HAVE_FSTRM
  119. static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
  120. static thread_local uint64_t t_frameStreamServersGeneration;
  121. #endif /* HAVE_FSTRM */
  122. thread_local std::unique_ptr<MT_t> MT; // the big MTasker
  123. std::unique_ptr<MemRecursorCache> s_RC;
  124. thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
  125. thread_local FDMultiplexer* t_fdm{nullptr};
  126. thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
  127. thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
  128. thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
  129. #ifdef NOD_ENABLED
  130. thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
  131. thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
  132. #endif /* NOD_ENABLED */
  133. __thread struct timeval g_now; // timestamp, updated (too) frequently
  134. typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
  135. // for communicating with our threads
  136. // effectively readonly after startup
  137. struct RecThreadInfo
  138. {
  139. struct ThreadPipeSet
  140. {
  141. int writeToThread{-1};
  142. int readToThread{-1};
  143. int writeFromThread{-1};
  144. int readFromThread{-1};
  145. int writeQueriesToThread{-1}; // this one is non-blocking
  146. int readQueriesToThread{-1};
  147. };
  148. /* FD corresponding to TCP sockets this thread is listening
  149. on.
  150. These FDs are also in deferredAdds when we have one
  151. socket per listener, and in g_deferredAdds instead. */
  152. std::set<int> tcpSockets;
  153. /* FD corresponding to listening sockets if we have one socket per
  154. listener (with reuseport), otherwise all listeners share the
  155. same FD and g_deferredAdds is then used instead */
  156. deferredAdd_t deferredAdds;
  157. struct ThreadPipeSet pipes;
  158. std::thread thread;
  159. MT_t* mt{nullptr};
  160. uint64_t numberOfDistributedQueries{0};
  161. int exitCode{0};
  162. /* handle the web server, carbon, statistics and the control channel */
  163. bool isHandler{false};
  164. /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
  165. bool isListener{false};
  166. /* process queries */
  167. bool isWorker{false};
  168. };
  169. /* first we have the handler thread, t_id == 0 (some other
  170. helper threads like SNMP might have t_id == 0 as well)
  171. then the distributor threads if any
  172. and finally the workers */
  173. static std::vector<RecThreadInfo> s_threadInfos;
  174. /* without reuseport, all listeners share the same sockets */
  175. static deferredAdd_t g_deferredAdds;
  176. typedef vector<int> tcpListenSockets_t;
  177. typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
  178. static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
  179. static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
  180. static AtomicCounter counter;
  181. static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
  182. static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
  183. static NetmaskGroup g_XPFAcl;
  184. static NetmaskGroup g_proxyProtocolACL;
  185. static boost::optional<ComboAddress> g_dns64Prefix{boost::none};
  186. static DNSName g_dns64PrefixReverse;
  187. static size_t g_proxyProtocolMaximumSize;
  188. static size_t g_tcpMaxQueriesPerConn;
  189. static size_t s_maxUDPQueriesPerRound;
  190. static uint64_t g_latencyStatSize;
  191. static uint32_t g_disthashseed;
  192. static unsigned int g_maxTCPPerClient;
  193. static unsigned int g_maxMThreads;
  194. static unsigned int g_numDistributorThreads;
  195. static unsigned int g_numWorkerThreads;
  196. static int g_tcpTimeout;
  197. static uint16_t g_udpTruncationThreshold;
  198. static uint16_t g_xpfRRCode{0};
  199. static std::atomic<bool> statsWanted;
  200. static std::atomic<bool> g_quiet;
  201. static bool g_logCommonErrors;
  202. static bool g_anyToTcp;
  203. static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
  204. static bool g_reusePort{false};
  205. static bool g_gettagNeedsEDNSOptions{false};
  206. static time_t g_statisticsInterval;
  207. static bool g_useIncomingECS;
  208. static bool g_useKernelTimestamp;
  209. std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
  210. #ifdef NOD_ENABLED
  211. static bool g_nodEnabled;
  212. static DNSName g_nodLookupDomain;
  213. static bool g_nodLog;
  214. static SuffixMatchNode g_nodDomainWL;
  215. static std::string g_nod_pbtag;
  216. static bool g_udrEnabled;
  217. static bool g_udrLog;
  218. static std::string g_udr_pbtag;
  219. #endif /* NOD_ENABLED */
  220. #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
  221. static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
  222. #else
  223. static std::set<uint16_t> s_avoidUdpSourcePorts;
  224. #endif
  225. static uint16_t s_minUdpSourcePort;
  226. static uint16_t s_maxUdpSourcePort;
  227. static double s_balancingFactor;
  228. RecursorControlChannel s_rcc; // only active in the handler thread
  229. RecursorStats g_stats;
  230. string s_programname="pdns_recursor";
  231. string s_pidfname;
  232. bool g_lowercaseOutgoing;
  233. unsigned int g_networkTimeoutMsec;
  234. unsigned int g_numThreads;
  235. uint16_t g_outgoingEDNSBufsize;
  236. bool g_logRPZChanges{false};
  237. // Used in the Syncres to not throttle certain servers
  238. GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
  239. GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
  240. #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
  241. #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
  242. // Bad Nets taken from both:
  243. // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
  244. // and
  245. // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
  246. // where such a network may not be considered a valid destination
  247. #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
  248. #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
  249. //! used to send information to a newborn mthread
  250. struct DNSComboWriter {
  251. DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
  252. {
  253. }
  254. DNSComboWriter(const std::string& query, const struct timeval& now, std::unordered_set<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
  255. {
  256. }
  257. void setRemote(const ComboAddress& sa)
  258. {
  259. d_remote=sa;
  260. }
  261. void setSource(const ComboAddress& sa)
  262. {
  263. d_source=sa;
  264. }
  265. void setLocal(const ComboAddress& sa)
  266. {
  267. d_local=sa;
  268. }
  269. void setDestination(const ComboAddress& sa)
  270. {
  271. d_destination=sa;
  272. }
  273. void setSocket(int sock)
  274. {
  275. d_socket=sock;
  276. }
  277. string getRemote() const
  278. {
  279. if (d_source == d_remote) {
  280. return d_source.toStringWithPort();
  281. }
  282. return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
  283. }
  284. std::vector<ProxyProtocolValue> d_proxyProtocolValues;
  285. MOADNSParser d_mdp;
  286. struct timeval d_now;
  287. /* Remote client, might differ from d_source
  288. in case of XPF, in which case d_source holds
  289. the IP of the client and d_remote of the proxy
  290. */
  291. ComboAddress d_remote;
  292. ComboAddress d_source;
  293. /* Destination address, might differ from
  294. d_destination in case of XPF, in which case
  295. d_destination holds the IP of the proxy and
  296. d_local holds our own. */
  297. ComboAddress d_local;
  298. ComboAddress d_destination;
  299. #ifdef HAVE_PROTOBUF
  300. boost::uuids::uuid d_uuid;
  301. string d_requestorId;
  302. string d_deviceId;
  303. string d_deviceName;
  304. struct timeval d_kernelTimestamp{0,0};
  305. #endif
  306. std::string d_query;
  307. std::unordered_set<std::string> d_policyTags;
  308. std::string d_routingTag;
  309. std::vector<DNSRecord> d_records;
  310. LuaContext::LuaObject d_data;
  311. EDNSSubnetOpts d_ednssubnet;
  312. shared_ptr<TCPConnection> d_tcpConnection;
  313. boost::optional<int> d_rcode{boost::none};
  314. int d_socket{-1};
  315. unsigned int d_tag{0};
  316. uint32_t d_qhash{0};
  317. uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
  318. uint16_t d_ecsBegin{0};
  319. uint16_t d_ecsEnd{0};
  320. bool d_variable{false};
  321. bool d_ecsFound{false};
  322. bool d_ecsParsed{false};
  323. bool d_followCNAMERecords{false};
  324. bool d_logResponse{false};
  325. bool d_tcp{false};
  326. };
  327. MT_t* getMT()
  328. {
  329. return MT ? MT.get() : nullptr;
  330. }
  331. ArgvMap &arg()
  332. {
  333. static ArgvMap theArg;
  334. return theArg;
  335. }
  336. unsigned int getRecursorThreadId()
  337. {
  338. return t_id;
  339. }
  340. static bool isDistributorThread()
  341. {
  342. if (t_id == 0) {
  343. return false;
  344. }
  345. return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
  346. }
  347. static bool isHandlerThread()
  348. {
  349. if (t_id == 0) {
  350. return true;
  351. }
  352. return s_threadInfos.at(t_id).isHandler;
  353. }
  354. static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
  355. // -1 is error, 0 is timeout, 1 is success
  356. int asendtcp(const string& data, Socket* sock)
  357. {
  358. PacketID pident;
  359. pident.sock=sock;
  360. pident.outMSG=data;
  361. t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
  362. string packet;
  363. int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
  364. if(!ret || ret==-1) { // timeout
  365. t_fdm->removeWriteFD(sock->getHandle());
  366. }
  367. else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
  368. return -1;
  369. }
  370. return ret;
  371. }
  372. static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
  373. // -1 is error, 0 is timeout, 1 is success
  374. int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
  375. {
  376. data.clear();
  377. PacketID pident;
  378. pident.sock=sock;
  379. pident.inNeeded=len;
  380. pident.inIncompleteOkay=incompleteOkay;
  381. t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
  382. int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
  383. if(!ret || ret==-1) { // timeout
  384. t_fdm->removeReadFD(sock->getHandle());
  385. }
  386. else if(data.empty()) {// error, EOF or other
  387. return -1;
  388. }
  389. return ret;
  390. }
  391. static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
  392. {
  393. PacketID pident=*any_cast<PacketID>(&var);
  394. char resp[512];
  395. ComboAddress fromaddr;
  396. socklen_t addrlen=sizeof(fromaddr);
  397. ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
  398. if (fromaddr != pident.remote) {
  399. g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
  400. }
  401. t_fdm->removeReadFD(fd);
  402. if(ret >= 0) {
  403. string data(resp, (size_t) ret);
  404. MT->sendEvent(pident, &data);
  405. }
  406. else {
  407. string empty;
  408. MT->sendEvent(pident, &empty);
  409. // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
  410. }
  411. }
  412. string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
  413. {
  414. Socket s(dest.sin4.sin_family, SOCK_DGRAM);
  415. s.setNonBlocking();
  416. ComboAddress local = pdns::getQueryLocalAddress(dest.sin4.sin_family, 0);
  417. s.bind(local);
  418. s.connect(dest);
  419. s.send(query);
  420. PacketID pident;
  421. pident.sock=&s;
  422. pident.remote=dest;
  423. pident.type=0;
  424. t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
  425. string data;
  426. int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
  427. if(!ret || ret==-1) { // timeout
  428. t_fdm->removeReadFD(s.getHandle());
  429. }
  430. else if(data.empty()) {// error, EOF or other
  431. // we could special case this
  432. return data;
  433. }
  434. return data;
  435. }
  436. static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
  437. static void setSocketBuffer(int fd, int optname, uint32_t size)
  438. {
  439. uint32_t psize=0;
  440. socklen_t len=sizeof(psize);
  441. if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
  442. g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
  443. return;
  444. }
  445. if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
  446. int err = errno;
  447. g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
  448. }
  449. }
  450. static void setSocketReceiveBuffer(int fd, uint32_t size)
  451. {
  452. setSocketBuffer(fd, SO_RCVBUF, size);
  453. }
  454. static void setSocketSendBuffer(int fd, uint32_t size)
  455. {
  456. setSocketBuffer(fd, SO_SNDBUF, size);
  457. }
  458. // you can ask this class for a UDP socket to send a query from
  459. // this socket is not yours, don't even think about deleting it
  460. // but after you call 'returnSocket' on it, don't assume anything anymore
  461. class UDPClientSocks
  462. {
  463. unsigned int d_numsocks;
  464. public:
  465. UDPClientSocks() : d_numsocks(0)
  466. {
  467. }
  468. // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
  469. int getSocket(const ComboAddress& toaddr, int* fd)
  470. {
  471. *fd=makeClientSocket(toaddr.sin4.sin_family);
  472. if(*fd < 0) // temporary error - receive exception otherwise
  473. return -2;
  474. if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
  475. int err = errno;
  476. try {
  477. closesocket(*fd);
  478. }
  479. catch(const PDNSException& e) {
  480. g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
  481. }
  482. if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
  483. return -2;
  484. return -1;
  485. }
  486. d_numsocks++;
  487. return 0;
  488. }
  489. // return a socket to the pool, or simply erase it
  490. void returnSocket(int fd)
  491. {
  492. try {
  493. t_fdm->removeReadFD(fd);
  494. }
  495. catch(const FDMultiplexerException& e) {
  496. // we sometimes return a socket that has not yet been assigned to t_fdm
  497. }
  498. try {
  499. closesocket(fd);
  500. }
  501. catch(const PDNSException& e) {
  502. g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
  503. }
  504. --d_numsocks;
  505. }
  506. private:
  507. // returns -1 for errors which might go away, throws for ones that won't
  508. static int makeClientSocket(int family)
  509. {
  510. int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
  511. if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
  512. return ret;
  513. if(ret<0)
  514. throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
  515. // setCloseOnExec(ret); // we're not going to exec
  516. int tries=10;
  517. ComboAddress sin;
  518. while(--tries) {
  519. uint16_t port;
  520. if(tries==1) // fall back to kernel 'random'
  521. port = 0;
  522. else {
  523. do {
  524. port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
  525. }
  526. while (s_avoidUdpSourcePorts.count(port));
  527. }
  528. sin=pdns::getQueryLocalAddress(family, port); // does htons for us
  529. if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
  530. break;
  531. }
  532. if(!tries) {
  533. closesocket(ret);
  534. throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
  535. }
  536. try {
  537. setReceiveSocketErrors(ret, family);
  538. setNonBlocking(ret);
  539. }
  540. catch(...) {
  541. closesocket(ret);
  542. throw;
  543. }
  544. return ret;
  545. }
  546. };
  547. static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
  548. /* these two functions are used by LWRes */
  549. // -2 is OS error, -1 is error that depends on the remote, > 0 is success
  550. int asendto(const char *data, size_t len, int flags,
  551. const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
  552. {
  553. PacketID pident;
  554. pident.domain = domain;
  555. pident.remote = toaddr;
  556. pident.type = qtype;
  557. // see if there is an existing outstanding request we can chain on to, using partial equivalence function
  558. pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
  559. for(; chain.first != chain.second; chain.first++) {
  560. if(chain.first->key.fd > -1 && !chain.first->key.closed) { // don't chain onto existing chained waiter or a chain already processed
  561. /*
  562. cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
  563. cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
  564. <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
  565. */
  566. chain.first->key.chain.insert(id); // we can chain
  567. *fd=-1; // gets used in waitEvent / sendEvent later on
  568. return 1;
  569. }
  570. }
  571. int ret=t_udpclientsocks->getSocket(toaddr, fd);
  572. if(ret < 0)
  573. return ret;
  574. pident.fd=*fd;
  575. pident.id=id;
  576. t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
  577. ret = send(*fd, data, len, 0);
  578. int tmp = errno;
  579. if(ret < 0)
  580. t_udpclientsocks->returnSocket(*fd);
  581. errno = tmp; // this is for logging purposes only
  582. return ret;
  583. }
  584. // -1 is error, 0 is timeout, 1 is success
  585. int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
  586. uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
  587. {
  588. static optional<unsigned int> nearMissLimit;
  589. if(!nearMissLimit)
  590. nearMissLimit=::arg().asNum("spoof-nearmiss-max");
  591. PacketID pident;
  592. pident.fd=fd;
  593. pident.id=id;
  594. pident.domain=domain;
  595. pident.type = qtype;
  596. pident.remote=fromaddr;
  597. int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
  598. /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
  599. if(ret > 0) {
  600. /* handleUDPServerResponse() will close the socket for us no matter what */
  601. if(packet.empty()) // means "error"
  602. return -1;
  603. *d_len=packet.size();
  604. if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
  605. g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
  606. g_stats.spoofCount++;
  607. return -1;
  608. }
  609. }
  610. else {
  611. /* getting there means error or timeout, it's up to us to close the socket */
  612. if(fd >= 0)
  613. t_udpclientsocks->returnSocket(fd);
  614. }
  615. return ret;
  616. }
  617. static void writePid(void)
  618. {
  619. if(!::arg().mustDo("write-pid"))
  620. return;
  621. ofstream of(s_pidfname.c_str(), std::ios_base::app);
  622. if(of)
  623. of<< Utility::getpid() <<endl;
  624. else {
  625. int err = errno;
  626. g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
  627. << stringerror(err) << endl;
  628. }
  629. }
  630. uint16_t TCPConnection::s_maxInFlight;
  631. TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
  632. {
  633. ++s_currentConnections;
  634. (*t_tcpClientCounts)[d_remote]++;
  635. }
  636. TCPConnection::~TCPConnection()
  637. {
  638. try {
  639. if(closesocket(d_fd) < 0)
  640. g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
  641. }
  642. catch(const PDNSException& e) {
  643. g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
  644. }
  645. if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
  646. t_tcpClientCounts->erase(d_remote);
  647. --s_currentConnections;
  648. }
  649. AtomicCounter TCPConnection::s_currentConnections;
  650. static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
  651. // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
  652. static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
  653. {
  654. if(packetsize > 1000 && t_largeanswerremotes)
  655. t_largeanswerremotes->push_back(remote);
  656. switch(res) {
  657. case RCode::ServFail:
  658. if(t_servfailremotes) {
  659. t_servfailremotes->push_back(remote);
  660. if(query && t_servfailqueryring) // packet cache
  661. t_servfailqueryring->push_back(make_pair(*query, qtype));
  662. }
  663. g_stats.servFails++;
  664. break;
  665. case RCode::NXDomain:
  666. g_stats.nxDomains++;
  667. break;
  668. case RCode::NoError:
  669. g_stats.noErrors++;
  670. break;
  671. }
  672. }
  673. static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
  674. try
  675. {
  676. return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
  677. }
  678. catch(...)
  679. {
  680. return "Exception making error message for exception";
  681. }
  682. #ifdef HAVE_PROTOBUF
  683. static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::unordered_set<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
  684. {
  685. if (!t_protobufServers) {
  686. return;
  687. }
  688. Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
  689. ComboAddress requestor = requestorNM.getMaskedNetwork();
  690. requestor.setPort(remote.getPort());
  691. RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
  692. message.setServerIdentity(SyncRes::s_serverID);
  693. message.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
  694. message.setRequestorId(requestorId);
  695. message.setDeviceId(deviceId);
  696. message.setDeviceName(deviceName);
  697. if (!policyTags.empty()) {
  698. message.setPolicyTags(policyTags);
  699. }
  700. // cerr <<message.toDebugString()<<endl;
  701. std::string str;
  702. message.serialize(str);
  703. for (auto& server : *t_protobufServers) {
  704. server->queueData(str);
  705. }
  706. }
  707. static void protobufLogResponse(const RecProtoBufMessage& message)
  708. {
  709. if (!t_protobufServers) {
  710. return;
  711. }
  712. // cerr <<message.toDebugString()<<endl;
  713. std::string str;
  714. message.serialize(str);
  715. for (auto& server : *t_protobufServers) {
  716. server->queueData(str);
  717. }
  718. }
  719. #endif
  720. /**
  721. * Chases the CNAME provided by the PolicyCustom RPZ policy.
  722. *
  723. * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
  724. * @param qtype: The QType of the original query
  725. * @param sr: A SyncRes
  726. * @param res: An integer that will contain the RCODE of the lookup we do
  727. * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
  728. */
  729. static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
  730. {
  731. if (spoofed.d_type == QType::CNAME) {
  732. bool oldWantsRPZ = sr.getWantsRPZ();
  733. sr.setWantsRPZ(false);
  734. vector<DNSRecord> ans;
  735. res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
  736. for (const auto& rec : ans) {
  737. if(rec.d_place == DNSResourceRecord::ANSWER) {
  738. ret.push_back(rec);
  739. }
  740. }
  741. // Reset the RPZ state of the SyncRes
  742. sr.setWantsRPZ(oldWantsRPZ);
  743. }
  744. }
  745. static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
  746. {
  747. pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
  748. if(rec.d_type != QType::OPT) // their TTL ain't real
  749. minTTL = min(minTTL, rec.d_ttl);
  750. rec.d_content->toPacket(pw);
  751. if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
  752. pw.rollback();
  753. if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
  754. pw.getHeader()->tc=1;
  755. pw.truncate();
  756. }
  757. return false;
  758. }
  759. return true;
  760. }
  761. enum class PolicyResult : uint8_t { NoAction, HaveAnswer, Drop };
  762. static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy, const std::unique_ptr<DNSComboWriter>& dc, SyncRes& sr, int& res, vector<DNSRecord>& ret, DNSPacketWriter& pw)
  763. {
  764. /* don't account truncate actions for TCP queries, since they are not applied */
  765. if (appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !dc->d_tcp) {
  766. ++g_stats.policyResults[appliedPolicy.d_kind];
  767. }
  768. if (sr.doLog() && appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
  769. g_log << Logger::Warning << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype).getName() << appliedPolicy.getLogString() << endl;
  770. }
  771. switch (appliedPolicy.d_kind) {
  772. case DNSFilterEngine::PolicyKind::NoAction:
  773. return PolicyResult::NoAction;
  774. case DNSFilterEngine::PolicyKind::Drop:
  775. ++g_stats.policyDrops;
  776. return PolicyResult::Drop;
  777. case DNSFilterEngine::PolicyKind::NXDOMAIN:
  778. ret.clear();
  779. res = RCode::NXDomain;
  780. return PolicyResult::HaveAnswer;
  781. case DNSFilterEngine::PolicyKind::NODATA:
  782. ret.clear();
  783. res = RCode::NoError;
  784. return PolicyResult::HaveAnswer;
  785. case DNSFilterEngine::PolicyKind::Truncate:
  786. if (!dc->d_tcp) {
  787. ret.clear();
  788. res = RCode::NoError;
  789. pw.getHeader()->tc = 1;
  790. return PolicyResult::HaveAnswer;
  791. }
  792. return PolicyResult::NoAction;
  793. case DNSFilterEngine::PolicyKind::Custom:
  794. res = RCode::NoError;
  795. {
  796. auto spoofed = appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
  797. for (auto& dr : spoofed) {
  798. ret.push_back(dr);
  799. try {
  800. handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
  801. }
  802. catch (const ImmediateServFailException& e) {
  803. if (g_logCommonErrors) {
  804. g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
  805. }
  806. res = RCode::ServFail;
  807. break;
  808. }
  809. catch (const PolicyHitException& e) {
  810. if (g_logCommonErrors) {
  811. g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '" << dc->d_mdp.d_qname << "' because another RPZ policy was hit" << endl;
  812. }
  813. res = RCode::ServFail;
  814. break;
  815. }
  816. }
  817. return PolicyResult::HaveAnswer;
  818. }
  819. }
  820. return PolicyResult::NoAction;
  821. }
  822. #ifdef HAVE_PROTOBUF
  823. static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
  824. {
  825. auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
  826. for (const auto& server : config.servers) {
  827. try {
  828. auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
  829. logger->setLogQueries(config.logQueries);
  830. logger->setLogResponses(config.logResponses);
  831. result->emplace_back(std::move(logger));
  832. }
  833. catch(const std::exception& e) {
  834. g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
  835. }
  836. catch(const PDNSException& e) {
  837. g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
  838. }
  839. }
  840. return result;
  841. }
  842. static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
  843. {
  844. if (!luaconfsLocal->protobufExportConfig.enabled) {
  845. if (t_protobufServers) {
  846. for (auto& server : *t_protobufServers) {
  847. server->stop();
  848. }
  849. t_protobufServers.reset();
  850. }
  851. return false;
  852. }
  853. /* if the server was not running, or if it was running according to a
  854. previous configuration */
  855. if (!t_protobufServers ||
  856. t_protobufServersGeneration < luaconfsLocal->generation) {
  857. if (t_protobufServers) {
  858. for (auto& server : *t_protobufServers) {
  859. server->stop();
  860. }
  861. }
  862. t_protobufServers.reset();
  863. t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
  864. t_protobufServersGeneration = luaconfsLocal->generation;
  865. }
  866. return true;
  867. }
  868. static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
  869. {
  870. if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
  871. if (t_outgoingProtobufServers) {
  872. for (auto& server : *t_outgoingProtobufServers) {
  873. server->stop();
  874. }
  875. }
  876. t_outgoingProtobufServers.reset();
  877. return false;
  878. }
  879. /* if the server was not running, or if it was running according to a
  880. previous configuration */
  881. if (!t_outgoingProtobufServers ||
  882. t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
  883. if (t_outgoingProtobufServers) {
  884. for (auto& server : *t_outgoingProtobufServers) {
  885. server->stop();
  886. }
  887. }
  888. t_outgoingProtobufServers.reset();
  889. t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
  890. t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
  891. }
  892. return true;
  893. }
  894. #ifdef HAVE_FSTRM
  895. static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
  896. {
  897. auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
  898. for (const auto& server : config.servers) {
  899. try {
  900. std::unordered_map<string,unsigned> options;
  901. options["bufferHint"] = config.bufferHint;
  902. options["flushTimeout"] = config.flushTimeout;
  903. options["inputQueueSize"] = config.inputQueueSize;
  904. options["outputQueueSize"] = config.outputQueueSize;
  905. options["queueNotifyThreshold"] = config.queueNotifyThreshold;
  906. options["reopenInterval"] = config.reopenInterval;
  907. FrameStreamLogger *fsl = nullptr;
  908. try {
  909. ComboAddress address(server);
  910. fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
  911. }
  912. catch (const PDNSException& e) {
  913. fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
  914. }
  915. fsl->setLogQueries(config.logQueries);
  916. fsl->setLogResponses(config.logResponses);
  917. result->emplace_back(fsl);
  918. }
  919. catch(const std::exception& e) {
  920. g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
  921. }
  922. catch(const PDNSException& e) {
  923. g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
  924. }
  925. }
  926. return result;
  927. }
  928. static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
  929. {
  930. if (!luaconfsLocal->frameStreamExportConfig.enabled) {
  931. if (t_frameStreamServers) {
  932. // dt's take care of cleanup
  933. t_frameStreamServers.reset();
  934. }
  935. return false;
  936. }
  937. /* if the server was not running, or if it was running according to a
  938. previous configuration */
  939. if (!t_frameStreamServers ||
  940. t_frameStreamServersGeneration < luaconfsLocal->generation) {
  941. if (t_frameStreamServers) {
  942. // dt's take care of cleanup
  943. t_frameStreamServers.reset();
  944. }
  945. t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
  946. t_frameStreamServersGeneration = luaconfsLocal->generation;
  947. }
  948. return true;
  949. }
  950. #endif /* HAVE_FSTRM */
  951. #endif /* HAVE_PROTOBUF */
  952. #ifdef NOD_ENABLED
  953. static bool nodCheckNewDomain(const DNSName& dname)
  954. {
  955. bool ret = false;
  956. // First check the (sub)domain isn't whitelisted for NOD purposes
  957. if (!g_nodDomainWL.check(dname)) {
  958. // Now check the NODDB (note this is probabilistic so can have FNs/FPs)
  959. if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
  960. if (g_nodLog) {
  961. // This should probably log to a dedicated log file
  962. g_log<<Logger::Notice<<"Newly observed domain nod="<<dname<<endl;
  963. }
  964. ret = true;
  965. }
  966. }
  967. return ret;
  968. }
  969. static void sendNODLookup(const DNSName& dname)
  970. {
  971. if (!(g_nodLookupDomain.isRoot())) {
  972. // Send a DNS A query to <domain>.g_nodLookupDomain
  973. static const QType qt(QType::A);
  974. static const uint16_t qc(QClass::IN);
  975. DNSName qname;
  976. try {
  977. qname = dname + g_nodLookupDomain;
  978. }
  979. catch(const std::range_error &e) {
  980. return;
  981. }
  982. vector<DNSRecord> dummy;
  983. directResolve(qname, qt, qc, dummy);
  984. }
  985. }
  986. static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
  987. {
  988. bool ret = false;
  989. if (record.d_place == DNSResourceRecord::ANSWER ||
  990. record.d_place == DNSResourceRecord::ADDITIONAL) {
  991. // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
  992. std::stringstream ss;
  993. ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
  994. if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
  995. if (g_udrLog) {
  996. // This should also probably log to a dedicated file.
  997. g_log<<Logger::Notice<<"Unique response observed: qname="<<dname<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
  998. }
  999. ret = true;
  1000. }
  1001. }
  1002. return ret;
  1003. }
  1004. #endif /* NOD_ENABLED */
  1005. int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
  1006. {
  1007. vector<DNSRecord> resolved;
  1008. DNSName target;
  1009. for(const DNSRecord& rr : ret) {
  1010. if(rr.d_type == QType::CNAME) {
  1011. auto rec = getRR<CNAMERecordContent>(rr);
  1012. if(rec) {
  1013. target=rec->getTarget();
  1014. break;
  1015. }
  1016. }
  1017. }
  1018. if(target.empty()) {
  1019. return 0;
  1020. }
  1021. int rcode = directResolve(target, qtype, QClass::IN, resolved);
  1022. for(DNSRecord& rr : resolved) {
  1023. ret.push_back(std::move(rr));
  1024. }
  1025. return rcode;
  1026. }
  1027. int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSRecord>& ret)
  1028. {
  1029. /* we pass a separate vector of records because we will be resolving the initial qname
  1030. again, possibly encountering the same CNAME(s), and we don't want to trigger the CNAME
  1031. loop detection. */
  1032. vector<DNSRecord> newRecords;
  1033. int rcode = directResolve(qname, QType(QType::A), QClass::IN, newRecords);
  1034. ret.reserve(ret.size() + newRecords.size());
  1035. for (auto& record : newRecords) {
  1036. ret.push_back(std::move(record));
  1037. }
  1038. // Remove double CNAME records
  1039. std::set<DNSName> seenCNAMEs;
  1040. ret.erase(std::remove_if(
  1041. ret.begin(),
  1042. ret.end(),
  1043. [&seenCNAMEs](DNSRecord& rr) {
  1044. if (rr.d_type == QType::CNAME) {
  1045. auto target = getRR<CNAMERecordContent>(rr);
  1046. if (target == nullptr) {
  1047. return false;
  1048. }
  1049. if (seenCNAMEs.count(target->getTarget()) > 0) {
  1050. // We've had this CNAME before, remove it
  1051. return true;
  1052. }
  1053. seenCNAMEs.insert(target->getTarget());
  1054. }
  1055. return false;
  1056. }),
  1057. ret.end());
  1058. bool seenA = false;
  1059. for (DNSRecord& rr : ret) {
  1060. if (rr.d_type == QType::A && rr.d_place == DNSResourceRecord::ANSWER) {
  1061. if (auto rec = getRR<ARecordContent>(rr)) {
  1062. ComboAddress ipv4(rec->getCA());
  1063. memcpy(&prefix.sin6.sin6_addr.s6_addr[12], &ipv4.sin4.sin_addr.s_addr, sizeof(ipv4.sin4.sin_addr.s_addr));
  1064. rr.d_content = std::make_shared<AAAARecordContent>(prefix);
  1065. rr.d_type = QType::AAAA;
  1066. }
  1067. seenA = true;
  1068. }
  1069. }
  1070. if (seenA) {
  1071. // We've seen an A in the ANSWER section, so there is no need to keep any
  1072. // SOA in the AUTHORITY section as this is not a NODATA response.
  1073. ret.erase(std::remove_if(
  1074. ret.begin(),
  1075. ret.end(),
  1076. [](DNSRecord& rr) {
  1077. return (rr.d_type == QType::SOA && rr.d_place == DNSResourceRecord::AUTHORITY);
  1078. }),
  1079. ret.end());
  1080. }
  1081. return rcode;
  1082. }
  1083. int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
  1084. {
  1085. /* qname has a reverse ordered IPv6 address, need to extract the underlying IPv4 address from it
  1086. and turn it into an IPv4 in-addr.arpa query */
  1087. ret.clear();
  1088. vector<string> parts = qname.getRawLabels();
  1089. if (parts.size() < 8) {
  1090. return -1;
  1091. }
  1092. string newquery;
  1093. for (int n = 0; n < 4; ++n) {
  1094. newquery +=
  1095. std::to_string(stoll(parts[n*2], 0, 16) + 16*stoll(parts[n*2+1], 0, 16));
  1096. newquery.append(1, '.');
  1097. }
  1098. newquery += "in-addr.arpa.";
  1099. DNSRecord rr;
  1100. rr.d_name = qname;
  1101. rr.d_type = QType::CNAME;
  1102. rr.d_content = std::make_shared<CNAMERecordContent>(newquery);
  1103. ret.push_back(rr);
  1104. int rcode = directResolve(DNSName(newquery), QType(QType::PTR), QClass::IN, ret);
  1105. return rcode;
  1106. }
  1107. static void startDoResolve(void *p)
  1108. {
  1109. auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
  1110. try {
  1111. if (t_queryring)
  1112. t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
  1113. uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
  1114. EDNSOpts edo;
  1115. std::vector<pair<uint16_t, string> > ednsOpts;
  1116. bool variableAnswer = dc->d_variable;
  1117. bool haveEDNS=false;
  1118. #ifdef NOD_ENABLED
  1119. bool hasUDR = false;
  1120. #endif /* NOD_ENABLED */
  1121. DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
  1122. uint8_t ednsExtRCode = 0;
  1123. if(getEDNSOpts(dc->d_mdp, &edo)) {
  1124. haveEDNS=true;
  1125. if (edo.d_version != 0) {
  1126. ednsExtRCode = ERCode::BADVERS;
  1127. }
  1128. if(!dc->d_tcp) {
  1129. /* rfc6891 6.2.3:
  1130. "Values lower than 512 MUST be treated as equal to 512."
  1131. */
  1132. maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
  1133. }
  1134. ednsOpts = edo.d_options;
  1135. maxanswersize -= 11; // EDNS header size
  1136. for (const auto& o : edo.d_options) {
  1137. if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
  1138. dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
  1139. } else if (o.first == EDNSOptionCode::NSID) {
  1140. const static string mode_server_id = ::arg()["server-id"];
  1141. if(mode_server_id != "disabled" && !mode_server_id.empty() &&
  1142. maxanswersize > (2 + 2 + mode_server_id.size())) {
  1143. returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
  1144. variableAnswer = true; // Can't packetcache an answer with NSID
  1145. // Option Code and Option Length are both 2
  1146. maxanswersize -= 2 + 2 + mode_server_id.size();
  1147. }
  1148. }
  1149. }
  1150. }
  1151. /* perhaps there was no EDNS or no ECS but by now we looked */
  1152. dc->d_ecsParsed = true;
  1153. vector<DNSRecord> ret;
  1154. vector<uint8_t> packet;
  1155. auto luaconfsLocal = g_luaconfs.getLocal();
  1156. // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
  1157. bool wantsRPZ(true);
  1158. boost::optional<RecProtoBufMessage> pbMessage(boost::none);
  1159. #ifdef HAVE_PROTOBUF
  1160. if (checkProtobufExport(luaconfsLocal)) {
  1161. Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
  1162. ComboAddress requestor = requestorNM.getMaskedNetwork();
  1163. requestor.setPort(dc->d_source.getPort());
  1164. pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
  1165. pbMessage->setServerIdentity(SyncRes::s_serverID);
  1166. pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
  1167. }
  1168. #endif /* HAVE_PROTOBUF */
  1169. #ifdef HAVE_FSTRM
  1170. checkFrameStreamExport(luaconfsLocal);
  1171. #endif
  1172. DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
  1173. pw.getHeader()->aa=0;
  1174. pw.getHeader()->ra=1;
  1175. pw.getHeader()->qr=1;
  1176. pw.getHeader()->tc=0;
  1177. pw.getHeader()->id=dc->d_mdp.d_header.id;
  1178. pw.getHeader()->rd=dc->d_mdp.d_header.rd;
  1179. pw.getHeader()->cd=dc->d_mdp.d_header.cd;
  1180. /* This is the lowest TTL seen in the records of the response,
  1181. so we can't cache it for longer than this value.
  1182. If we have a TTL cap, this value can't be larger than the
  1183. cap no matter what. */
  1184. uint32_t minTTL = dc->d_ttlCap;
  1185. SyncRes sr(dc->d_now);
  1186. sr.setId(MT->getTid());
  1187. bool DNSSECOK=false;
  1188. if(t_pdl) {
  1189. sr.setLuaEngine(t_pdl);
  1190. }
  1191. if(g_dnssecmode != DNSSECMode::Off) {
  1192. sr.setDoDNSSEC(true);
  1193. // Does the requestor want DNSSEC records?
  1194. if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
  1195. DNSSECOK=true;
  1196. g_stats.dnssecQueries++;
  1197. }
  1198. if (dc->d_mdp.d_header.cd) {
  1199. /* Per rfc6840 section 5.9, "When processing a request with
  1200. the Checking Disabled (CD) bit set, a resolver SHOULD attempt
  1201. to return all response data, even data that has failed DNSSEC
  1202. validation. */
  1203. ++g_stats.dnssecCheckDisabledQueries;
  1204. }
  1205. if (dc->d_mdp.d_header.ad) {
  1206. /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
  1207. indicating that the requester understands and is interested in the
  1208. value of the AD bit in the response. This allows a requester to
  1209. indicate that it understands the AD bit without also requesting
  1210. DNSSEC data via the DO bit. */
  1211. ++g_stats.dnssecAuthenticDataQueries;
  1212. }
  1213. } else {
  1214. // Ignore the client-set CD flag
  1215. pw.getHeader()->cd=0;
  1216. }
  1217. sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
  1218. #ifdef HAVE_PROTOBUF
  1219. sr.setInitialRequestId(dc->d_uuid);
  1220. sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
  1221. #endif
  1222. #ifdef HAVE_FSTRM
  1223. sr.setFrameStreamServers(t_frameStreamServers);
  1224. #endif
  1225. sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
  1226. sr.setQueryReceivedOverTCP(dc->d_tcp);
  1227. bool tracedQuery=false; // we could consider letting Lua know about this too
  1228. bool shouldNotValidate = false;
  1229. /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
  1230. int res = RCode::NoError;
  1231. DNSFilterEngine::Policy appliedPolicy;
  1232. RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
  1233. dq.ednsFlags = &edo.d_extFlags;
  1234. dq.ednsOptions = &ednsOpts;
  1235. dq.tag = dc->d_tag;
  1236. dq.discardedPolicies = &sr.d_discardedPolicies;
  1237. dq.policyTags = &dc->d_policyTags;
  1238. dq.appliedPolicy = &appliedPolicy;
  1239. dq.currentRecords = &ret;
  1240. dq.dh = &dc->d_mdp.d_header;
  1241. dq.data = dc->d_data;
  1242. #ifdef HAVE_PROTOBUF
  1243. dq.requestorId = dc->d_requestorId;
  1244. dq.deviceId = dc->d_deviceId;
  1245. dq.deviceName = dc->d_deviceName;
  1246. #endif
  1247. dq.proxyProtocolValues = &dc->d_proxyProtocolValues;
  1248. if(ednsExtRCode != 0) {
  1249. goto sendit;
  1250. }
  1251. if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
  1252. pw.getHeader()->tc = 1;
  1253. res = 0;
  1254. variableAnswer = true;
  1255. goto sendit;
  1256. }
  1257. if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
  1258. sr.setLogMode(SyncRes::Store);
  1259. tracedQuery=true;
  1260. }
  1261. if(!g_quiet || tracedQuery) {
  1262. g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
  1263. <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
  1264. if(!dc->d_ednssubnet.source.empty()) {
  1265. g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
  1266. }
  1267. g_log<<endl;
  1268. }
  1269. if(!dc->d_mdp.d_header.rd) {
  1270. sr.setCacheOnly();
  1271. }
  1272. if (t_pdl) {
  1273. t_pdl->prerpz(dq, res);
  1274. }
  1275. // Check if the client has a policy attached to it
  1276. if (wantsRPZ && !appliedPolicy.wasHit()) {
  1277. if (luaconfsLocal->dfe.getClientPolicy(dc->d_source, sr.d_discardedPolicies, appliedPolicy)) {
  1278. mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
  1279. }
  1280. }
  1281. /* If we already have an answer generated from gettag_ffi, let's see if the filtering policies
  1282. should be applied to it */
  1283. if (dc->d_rcode != boost::none) {
  1284. bool policyOverride = false;
  1285. /* Unless we already matched on the client IP, time to check the qname.
  1286. We normally check it in beginResolve() but it will be bypassed since we already have an answer */
  1287. if (wantsRPZ && appliedPolicy.policyOverridesGettag()) {
  1288. if (appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
  1289. // Client IP already matched
  1290. }
  1291. else {
  1292. // no match on the client IP, check the qname
  1293. if (luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, sr.d_discardedPolicies, appliedPolicy)) {
  1294. // got a match
  1295. mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
  1296. }
  1297. }
  1298. if (appliedPolicy.wasHit()) {
  1299. policyOverride = true;
  1300. }
  1301. }
  1302. if (!policyOverride) {
  1303. /* No RPZ or gettag overrides it anyway */
  1304. ret = std::move(dc->d_records);
  1305. res = *dc->d_rcode;
  1306. if (res == RCode::NoError && dc->d_followCNAMERecords) {
  1307. res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
  1308. }
  1309. goto haveAnswer;
  1310. }
  1311. }
  1312. // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
  1313. if (!t_pdl || !t_pdl->preresolve(dq, res)) {
  1314. if (!g_dns64PrefixReverse.empty() && dq.qtype == QType::PTR && dq.qname.isPartOf(g_dns64PrefixReverse)) {
  1315. res = getFakePTRRecords(dq.qname, ret);
  1316. goto haveAnswer;
  1317. }
  1318. sr.setWantsRPZ(wantsRPZ);
  1319. if (wantsRPZ && appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
  1320. if (t_pdl && t_pdl->policyHitEventFilter(dc->d_remote, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, appliedPolicy, dc->d_policyTags, sr.d_discardedPolicies)) {
  1321. /* reset to no match */
  1322. appliedPolicy = DNSFilterEngine::Policy();
  1323. }
  1324. else {
  1325. auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
  1326. if (policyResult == PolicyResult::HaveAnswer) {
  1327. goto haveAnswer;
  1328. }
  1329. else if (policyResult == PolicyResult::Drop) {
  1330. return;
  1331. }
  1332. }
  1333. }
  1334. // Query did not get handled for Client IP or QNAME Policy reasons, now actually go out to find an answer
  1335. try {
  1336. sr.d_appliedPolicy = appliedPolicy;
  1337. sr.d_policyTags = std::move(dc->d_policyTags);
  1338. if (!dc->d_routingTag.empty()) {
  1339. sr.d_routingTag = dc->d_routingTag;
  1340. }
  1341. ret.clear(); // policy might have filled it with custom records but we decided not to use them
  1342. res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
  1343. shouldNotValidate = sr.wasOutOfBand();
  1344. }
  1345. catch (const ImmediateQueryDropException& e) {
  1346. // XXX We need to export a protobuf message (and do a NOD lookup) if requested!
  1347. g_stats.policyDrops++;
  1348. g_log<<Logger::Debug<<"Dropping query because of a filtering policy "<<makeLoginfo(dc)<<endl;
  1349. return;
  1350. }
  1351. catch (const ImmediateServFailException &e) {
  1352. if(g_logCommonErrors) {
  1353. g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
  1354. }
  1355. res = RCode::ServFail;
  1356. }
  1357. catch (const SendTruncatedAnswerException& e) {
  1358. ret.clear();
  1359. res = RCode::NoError;
  1360. pw.getHeader()->tc = 1;
  1361. }
  1362. catch (const PolicyHitException& e) {
  1363. res = -2;
  1364. }
  1365. dq.validationState = sr.getValidationState();
  1366. appliedPolicy = sr.d_appliedPolicy;
  1367. dc->d_policyTags = std::move(sr.d_policyTags);
  1368. // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
  1369. if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
  1370. if (appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction) {
  1371. throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
  1372. }
  1373. auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
  1374. if (policyResult == PolicyResult::HaveAnswer) {
  1375. goto haveAnswer;
  1376. }
  1377. else if (policyResult == PolicyResult::Drop) {
  1378. return;
  1379. }
  1380. }
  1381. if (t_pdl || (g_dns64Prefix && dq.qtype == QType::AAAA && !vStateIsBogus(dq.validationState))) {
  1382. if (res == RCode::NoError) {
  1383. auto i = ret.cbegin();
  1384. for(; i!= ret.cend(); ++i) {
  1385. if (i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER) {
  1386. break;
  1387. }
  1388. }
  1389. if (i == ret.cend()) {
  1390. /* no record in the answer section, NODATA */
  1391. if (t_pdl && t_pdl->nodata(dq, res)) {
  1392. shouldNotValidate = true;
  1393. }
  1394. else if (g_dns64Prefix && dq.qtype == QType::AAAA && !vStateIsBogus(dq.validationState)) {
  1395. res = getFakeAAAARecords(dq.qname, *g_dns64Prefix, ret);
  1396. shouldNotValidate = true;
  1397. }
  1398. }
  1399. }
  1400. else if(res == RCode::NXDomain && t_pdl && t_pdl->nxdomain(dq, res)) {
  1401. shouldNotValidate = true;
  1402. }
  1403. if (t_pdl && t_pdl->postresolve(dq, res)) {
  1404. shouldNotValidate = true;
  1405. }
  1406. }
  1407. }
  1408. haveAnswer:;
  1409. if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
  1410. {
  1411. string trace(sr.getTrace());
  1412. if(!trace.empty()) {
  1413. vector<string> lines;
  1414. boost::split(lines, trace, boost::is_any_of("\n"));
  1415. for(const string& line : lines) {
  1416. if(!line.empty())
  1417. g_log<<Logger::Warning<< line << endl;
  1418. }
  1419. }
  1420. }
  1421. if(res == -1) {
  1422. pw.getHeader()->rcode=RCode::ServFail;
  1423. // no commit here, because no record
  1424. g_stats.servFails++;
  1425. }
  1426. else {
  1427. pw.getHeader()->rcode=res;
  1428. // Does the validation mode or query demand validation?
  1429. if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
  1430. try {
  1431. if(sr.doLog()) {
  1432. g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
  1433. }
  1434. auto state = sr.getValidationState();
  1435. if(state == vState::Secure) {
  1436. if(sr.doLog()) {
  1437. g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
  1438. }
  1439. // Is the query source interested in the value of the ad-bit?
  1440. if (dc->d_mdp.d_header.ad || DNSSECOK)
  1441. pw.getHeader()->ad=1;
  1442. }
  1443. else if(state == vState::Insecure) {
  1444. if(sr.doLog()) {
  1445. g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
  1446. }
  1447. pw.getHeader()->ad=0;
  1448. }
  1449. else if (vStateIsBogus(state)) {
  1450. if(t_bogusremotes)
  1451. t_bogusremotes->push_back(dc->d_source);
  1452. if(t_bogusqueryring)
  1453. t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
  1454. if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
  1455. g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
  1456. }
  1457. // Does the query or validation mode sending out a SERVFAIL on validation errors?
  1458. if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
  1459. if(sr.doLog()) {
  1460. g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
  1461. }
  1462. pw.getHeader()->rcode=RCode::ServFail;
  1463. goto sendit;
  1464. } else {
  1465. if(sr.doLog()) {
  1466. g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
  1467. }
  1468. }
  1469. }
  1470. }
  1471. catch(const ImmediateServFailException &e) {
  1472. if(g_logCommonErrors)
  1473. g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
  1474. pw.getHeader()->rcode=RCode::ServFail;
  1475. goto sendit;
  1476. }
  1477. }
  1478. if(ret.size()) {
  1479. pdns::orderAndShuffle(ret);
  1480. if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
  1481. stable_sort(ret.begin(), ret.end(), *sl);
  1482. variableAnswer=true;
  1483. }
  1484. }
  1485. bool needCommit = false;
  1486. for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
  1487. if( ! DNSSECOK &&
  1488. ( i->d_type == QType::NSEC3 ||
  1489. (
  1490. ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
  1491. (
  1492. ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
  1493. i->d_place != DNSResourceRecord::ANSWER
  1494. )
  1495. )
  1496. )
  1497. ) {
  1498. continue;
  1499. }
  1500. if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
  1501. needCommit = false;
  1502. break;
  1503. }
  1504. needCommit = true;
  1505. #ifdef NOD_ENABLED
  1506. bool udr = false;
  1507. if (g_udrEnabled) {
  1508. udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
  1509. if (!hasUDR && udr)
  1510. hasUDR = true;
  1511. }
  1512. #endif /* NOD ENABLED */
  1513. #ifdef HAVE_PROTOBUF
  1514. if (t_protobufServers) {
  1515. #ifdef NOD_ENABLED
  1516. pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
  1517. #else
  1518. pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
  1519. #endif /* NOD_ENABLED */
  1520. }
  1521. #endif
  1522. }
  1523. if(needCommit)
  1524. pw.commit();
  1525. }
  1526. sendit:;
  1527. if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
  1528. // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
  1529. EDNSSubnetOpts eo;
  1530. eo.source = dc->d_ednssubnet.source;
  1531. ComboAddress sa;
  1532. sa.reset();
  1533. sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
  1534. eo.scope = Netmask(sa, 0);
  1535. returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
  1536. }
  1537. if (haveEDNS) {
  1538. /* we try to add the EDNS OPT RR even for truncated answers,
  1539. as rfc6891 states:
  1540. "The minimal response MUST be the DNS header, question section, and an
  1541. OPT record. This MUST also occur when a truncated response (using
  1542. the DNS header's TC bit) is returned."
  1543. */
  1544. pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
  1545. pw.commit();
  1546. }
  1547. g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
  1548. updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
  1549. #ifdef NOD_ENABLED
  1550. bool nod = false;
  1551. if (g_nodEnabled) {
  1552. if (nodCheckNewDomain(dc->d_mdp.d_qname)) {
  1553. nod = true;
  1554. }
  1555. }
  1556. #endif /* NOD_ENABLED */
  1557. #ifdef HAVE_PROTOBUF
  1558. if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
  1559. pbMessage->setBytes(packet.size());
  1560. pbMessage->setResponseCode(pw.getHeader()->rcode);
  1561. if (!appliedPolicy.getName().empty()) {
  1562. pbMessage->setAppliedPolicy(appliedPolicy.getName());
  1563. pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
  1564. pbMessage->setAppliedPolicyTrigger(appliedPolicy.d_trigger);
  1565. pbMessage->setAppliedPolicyHit(appliedPolicy.d_hit);
  1566. }
  1567. pbMessage->setPolicyTags(dc->d_policyTags);
  1568. if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
  1569. pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
  1570. }
  1571. else {
  1572. pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
  1573. }
  1574. pbMessage->setRequestorId(dq.requestorId);
  1575. pbMessage->setDeviceId(dq.deviceId);
  1576. pbMessage->setDeviceName(dq.deviceName);
  1577. #ifdef NOD_ENABLED
  1578. if (g_nodEnabled) {
  1579. if (nod) {
  1580. pbMessage->setNOD(true);
  1581. pbMessage->addPolicyTag(g_nod_pbtag);
  1582. }
  1583. if (hasUDR) {
  1584. pbMessage->addPolicyTag(g_udr_pbtag);
  1585. }
  1586. }
  1587. #endif /* NOD_ENABLED */
  1588. if (dc->d_logResponse) {
  1589. protobufLogResponse(*pbMessage);
  1590. }
  1591. #ifdef NOD_ENABLED
  1592. if (g_nodEnabled) {
  1593. pbMessage->setNOD(false);
  1594. pbMessage->clearUDR();
  1595. if (nod)
  1596. pbMessage->removePolicyTag(g_nod_pbtag);
  1597. if (hasUDR)
  1598. pbMessage->removePolicyTag(g_udr_pbtag);
  1599. }
  1600. #endif /* NOD_ENABLED */
  1601. }
  1602. #endif
  1603. if(!dc->d_tcp) {
  1604. struct msghdr msgh;
  1605. struct iovec iov;
  1606. cmsgbuf_aligned cbuf;
  1607. fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
  1608. msgh.msg_control=NULL;
  1609. if(g_fromtosockets.count(dc->d_socket)) {
  1610. addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
  1611. }
  1612. if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors) {
  1613. int err = errno;
  1614. g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
  1615. << strerror(err) << endl;
  1616. }
  1617. if(variableAnswer || sr.wasVariable()) {
  1618. g_stats.variableResponses++;
  1619. }
  1620. if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
  1621. t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
  1622. string((const char*)&*packet.begin(), packet.size()),
  1623. g_now.tv_sec,
  1624. pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
  1625. min(minTTL,SyncRes::s_packetcachettl),
  1626. dq.validationState,
  1627. dc->d_ecsBegin,
  1628. dc->d_ecsEnd,
  1629. std::move(pbMessage));
  1630. }
  1631. // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
  1632. }
  1633. else {
  1634. char buf[2];
  1635. buf[0]=packet.size()/256;
  1636. buf[1]=packet.size()%256;
  1637. Utility::iovec iov[2];
  1638. iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
  1639. iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
  1640. int wret=Utility::writev(dc->d_socket, iov, 2);
  1641. bool hadError=true;
  1642. if (wret == 0) {
  1643. g_log<<Logger::Warning<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
  1644. } else if (wret < 0 ) {
  1645. int err = errno;
  1646. g_log << Logger::Warning << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
  1647. } else if ((unsigned int)wret != 2 + packet.size()) {
  1648. g_log<<Logger::Warning<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
  1649. } else {
  1650. hadError=false;
  1651. }
  1652. // update tcp connection status, closing if needed and doing the fd multiplexer accounting
  1653. if (dc->d_tcpConnection->d_requestsInFlight > 0) {
  1654. dc->d_tcpConnection->d_requestsInFlight--;
  1655. }
  1656. // In the code below, we try to remove the fd from the set, but
  1657. // we don't know if another mthread already did the remove, so we can get a
  1658. // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
  1659. // will not work since we do not know if the other mthread got an error or not.
  1660. if(hadError) {
  1661. try {
  1662. t_fdm->removeReadFD(dc->d_socket);
  1663. }
  1664. catch (FDMultiplexerException &) {
  1665. }
  1666. dc->d_socket = -1;
  1667. }
  1668. else {
  1669. dc->d_tcpConnection->queriesCount++;
  1670. if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
  1671. try {
  1672. t_fdm->removeReadFD(dc->d_socket);
  1673. }
  1674. catch (FDMultiplexerException &) {
  1675. }
  1676. dc->d_socket = -1;
  1677. }
  1678. else {
  1679. Utility::gettimeofday(&g_now, 0); // needs to be updated
  1680. struct timeval ttd = g_now;
  1681. // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
  1682. if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
  1683. // A read error might have happened. If we add the fd back, it will most likely error again.
  1684. // This is not a big issue, the next handleTCPClientReadable() will see another read error
  1685. // and take action.
  1686. ttd.tv_sec += g_tcpTimeout;
  1687. t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
  1688. } else {
  1689. // fd might have been removed by read error code, or a read timeout, so expect an exception
  1690. try {
  1691. t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
  1692. }
  1693. catch (const FDMultiplexerException &) {
  1694. // but if the FD was removed because of a timeout while we were sending a response,
  1695. // we need to re-arm it. If it was an error it will error again.
  1696. ttd.tv_sec += g_tcpTimeout;
  1697. t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
  1698. }
  1699. }
  1700. }
  1701. }
  1702. }
  1703. float spent=makeFloat(sr.getNow()-dc->d_now);
  1704. if (!g_quiet) {
  1705. g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
  1706. g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
  1707. sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
  1708. sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
  1709. if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
  1710. g_log<< ", dnssec="<<sr.getValidationState();
  1711. }
  1712. g_log<<endl;
  1713. }
  1714. if (sr.d_outqueries || sr.d_authzonequeries) {
  1715. s_RC->cacheMisses++;
  1716. }
  1717. else {
  1718. s_RC->cacheHits++;
  1719. }
  1720. if(spent < 0.001)
  1721. g_stats.answers0_1++;
  1722. else if(spent < 0.010)
  1723. g_stats.answers1_10++;
  1724. else if(spent < 0.1)
  1725. g_stats.answers10_100++;
  1726. else if(spent < 1.0)
  1727. g_stats.answers100_1000++;
  1728. else
  1729. g_stats.answersSlow++;
  1730. uint64_t newLat=(uint64_t)(spent*1000000);
  1731. newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
  1732. g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
  1733. // no worries, we do this for packet cache hits elsewhere
  1734. auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
  1735. if(ourtime < 1)
  1736. g_stats.ourtime0_1++;
  1737. else if(ourtime < 2)
  1738. g_stats.ourtime1_2++;
  1739. else if(ourtime < 4)
  1740. g_stats.ourtime2_4++;
  1741. else if(ourtime < 8)
  1742. g_stats.ourtime4_8++;
  1743. else if(ourtime < 16)
  1744. g_stats.ourtime8_16++;
  1745. else if(ourtime < 32)
  1746. g_stats.ourtime16_32++;
  1747. else {
  1748. // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
  1749. g_stats.ourtimeSlow++;
  1750. }
  1751. if(ourtime >= 0.0) {
  1752. newLat=ourtime*1000; // usec
  1753. g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
  1754. }
  1755. #ifdef NOD_ENABLED
  1756. if (nod) {
  1757. sendNODLookup(dc->d_mdp.d_qname);
  1758. }
  1759. #endif /* NOD_ENABLED */
  1760. // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
  1761. }
  1762. catch (const PDNSException &ae) {
  1763. g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
  1764. }
  1765. catch (const MOADNSException &mde) {
  1766. g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
  1767. }
  1768. catch (const std::exception& e) {
  1769. g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
  1770. // Luawrapper nests the exception from Lua, so we unnest it here
  1771. try {
  1772. std::rethrow_if_nested(e);
  1773. } catch(const std::exception& ne) {
  1774. g_log<<". Extra info: "<<ne.what();
  1775. } catch(...) {}
  1776. g_log<<endl;
  1777. }
  1778. catch(...) {
  1779. g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
  1780. }
  1781. g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
  1782. }
  1783. static void makeControlChannelSocket(int processNum=-1)
  1784. {
  1785. string sockname=::arg()["socket-dir"]+"/"+s_programname;
  1786. if(processNum >= 0)
  1787. sockname += "."+std::to_string(processNum);
  1788. sockname+=".controlsocket";
  1789. s_rcc.listen(sockname);
  1790. int sockowner = -1;
  1791. int sockgroup = -1;
  1792. if (!::arg().isEmpty("socket-group"))
  1793. sockgroup=::arg().asGid("socket-group");
  1794. if (!::arg().isEmpty("socket-owner"))
  1795. sockowner=::arg().asUid("socket-owner");
  1796. if (sockgroup > -1 || sockowner > -1) {
  1797. if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
  1798. unixDie("Failed to chown control socket");
  1799. }
  1800. }
  1801. // do mode change if socket-mode is given
  1802. if(!::arg().isEmpty("socket-mode")) {
  1803. mode_t sockmode=::arg().asMode("socket-mode");
  1804. if(chmod(sockname.c_str(), sockmode) < 0) {
  1805. unixDie("Failed to chmod control socket");
  1806. }
  1807. }
  1808. }
  1809. static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
  1810. bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
  1811. bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
  1812. {
  1813. const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
  1814. const bool lookForECS = ednssubnet != nullptr;
  1815. const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
  1816. size_t questionLen = question.length();
  1817. unsigned int consumed=0;
  1818. *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
  1819. size_t pos= sizeof(dnsheader)+consumed+4;
  1820. const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
  1821. const uint16_t arcount = ntohs(dh->arcount);
  1822. for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
  1823. if (question.at(pos) != 0) {
  1824. /* not an OPT or a XPF, bye. */
  1825. return;
  1826. }
  1827. pos += 1;
  1828. const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
  1829. pos += sizeof(dnsrecordheader);
  1830. if (pos >= questionLen) {
  1831. return;
  1832. }
  1833. /* OPT root label (1) followed by type (2) */
  1834. if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
  1835. if (!options) {
  1836. char* ecsStart = nullptr;
  1837. size_t ecsLen = 0;
  1838. /* we need to pass the record len */
  1839. int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
  1840. if (res == 0 && ecsLen > 4) {
  1841. EDNSSubnetOpts eso;
  1842. if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
  1843. *ednssubnet=eso;
  1844. foundECS = true;
  1845. }
  1846. }
  1847. }
  1848. else {
  1849. /* we need to pass the record len */
  1850. int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
  1851. if (res == 0) {
  1852. const auto& it = options->find(EDNSOptionCode::ECS);
  1853. if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
  1854. EDNSSubnetOpts eso;
  1855. if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
  1856. *ednssubnet=eso;
  1857. foundECS = true;
  1858. }
  1859. }
  1860. }
  1861. }
  1862. }
  1863. else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
  1864. if ((questionLen - pos) < ntohs(drh->d_clen)) {
  1865. return;
  1866. }
  1867. foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
  1868. }
  1869. pos += ntohs(drh->d_clen);
  1870. }
  1871. }
  1872. static bool handleTCPReadResult(int fd, ssize_t bytes)
  1873. {
  1874. if (bytes == 0) {
  1875. /* EOF */
  1876. t_fdm->removeReadFD(fd);
  1877. return false;
  1878. }
  1879. else if (bytes < 0) {
  1880. if (errno != EAGAIN && errno != EWOULDBLOCK) {
  1881. t_fdm->removeReadFD(fd);
  1882. return false;
  1883. }
  1884. }
  1885. return true;
  1886. }
  1887. static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
  1888. {
  1889. shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
  1890. if (conn->state == TCPConnection::PROXYPROTOCOLHEADER) {
  1891. ssize_t bytes = recv(conn->getFD(), &conn->data.at(conn->proxyProtocolGot), conn->proxyProtocolNeed, 0);
  1892. if (bytes <= 0) {
  1893. handleTCPReadResult(fd, bytes);
  1894. return;
  1895. }
  1896. conn->proxyProtocolGot += bytes;
  1897. conn->data.resize(conn->proxyProtocolGot);
  1898. ssize_t remaining = isProxyHeaderComplete(conn->data);
  1899. if (remaining == 0) {
  1900. if (g_logCommonErrors) {
  1901. g_log<<Logger::Error<<"Unable to consume proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
  1902. }
  1903. ++g_stats.proxyProtocolInvalidCount;
  1904. t_fdm->removeReadFD(fd);
  1905. return;
  1906. }
  1907. else if (remaining < 0) {
  1908. conn->proxyProtocolNeed = -remaining;
  1909. conn->data.resize(conn->proxyProtocolGot + conn->proxyProtocolNeed);
  1910. return;
  1911. }
  1912. else {
  1913. /* proxy header received */
  1914. /* we ignore the TCP field for now, but we could properly set whether
  1915. the connection was received over UDP or TCP if needed */
  1916. bool tcp;
  1917. bool proxy = false;
  1918. size_t used = parseProxyHeader(conn->data, proxy, conn->d_source, conn->d_destination, tcp, conn->proxyProtocolValues);
  1919. if (used <= 0) {
  1920. if (g_logCommonErrors) {
  1921. g_log<<Logger::Error<<"Unable to parse proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
  1922. }
  1923. ++g_stats.proxyProtocolInvalidCount;
  1924. t_fdm->removeReadFD(fd);
  1925. return;
  1926. }
  1927. else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
  1928. if (g_logCommonErrors) {
  1929. g_log<<Logger::Error<<"Proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
  1930. }
  1931. ++g_stats.proxyProtocolInvalidCount;
  1932. t_fdm->removeReadFD(fd);
  1933. return;
  1934. }
  1935. /* Now that we have retrieved the address of the client, as advertised by the proxy
  1936. via the proxy protocol header, check that it is allowed by our ACL */
  1937. /* note that if the proxy header used a 'LOCAL' command, the original source and destination are untouched so everything should be fine */
  1938. if (t_allowFrom && !t_allowFrom->match(&conn->d_source)) {
  1939. if (!g_quiet) {
  1940. g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<conn->d_source.toString()<<", address not matched by allow-from"<<endl;
  1941. }
  1942. ++g_stats.unauthorizedTCP;
  1943. t_fdm->removeReadFD(fd);
  1944. return;
  1945. }
  1946. conn->data.resize(2);
  1947. conn->state = TCPConnection::BYTE0;
  1948. }
  1949. }
  1950. if (conn->state==TCPConnection::BYTE0) {
  1951. ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
  1952. if(bytes==1)
  1953. conn->state=TCPConnection::BYTE1;
  1954. if(bytes==2) {
  1955. conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
  1956. conn->data.resize(conn->qlen);
  1957. conn->bytesread=0;
  1958. conn->state=TCPConnection::GETQUESTION;
  1959. }
  1960. if (bytes <= 0) {
  1961. handleTCPReadResult(fd, bytes);
  1962. return;
  1963. }
  1964. }
  1965. if (conn->state==TCPConnection::BYTE1) {
  1966. ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
  1967. if(bytes==1) {
  1968. conn->state=TCPConnection::GETQUESTION;
  1969. conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
  1970. conn->data.resize(conn->qlen);
  1971. conn->bytesread=0;
  1972. }
  1973. if (bytes <= 0) {
  1974. if (!handleTCPReadResult(fd, bytes)) {
  1975. if(g_logCommonErrors) {
  1976. g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
  1977. }
  1978. }
  1979. return;
  1980. }
  1981. }
  1982. if(conn->state==TCPConnection::GETQUESTION) {
  1983. ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
  1984. if (bytes <= 0) {
  1985. if (!handleTCPReadResult(fd, bytes)) {
  1986. if(g_logCommonErrors) {
  1987. g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
  1988. }
  1989. }
  1990. return;
  1991. }
  1992. else if (bytes > std::numeric_limits<std::uint16_t>::max()) {
  1993. if(g_logCommonErrors) {
  1994. g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" sent an invalid question size while reading question body"<<endl;
  1995. }
  1996. t_fdm->removeReadFD(fd);
  1997. return;
  1998. }
  1999. conn->bytesread+=(uint16_t)bytes;
  2000. if(conn->bytesread==conn->qlen) {
  2001. conn->state = TCPConnection::BYTE0;
  2002. std::unique_ptr<DNSComboWriter> dc;
  2003. try {
  2004. dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
  2005. }
  2006. catch(const MOADNSException &mde) {
  2007. g_stats.clientParseError++;
  2008. if(g_logCommonErrors)
  2009. g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
  2010. return;
  2011. }
  2012. dc->d_tcpConnection = conn; // carry the torch
  2013. dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
  2014. dc->d_tcp=true;
  2015. dc->setRemote(conn->d_remote);
  2016. dc->setSource(conn->d_source);
  2017. ComboAddress dest;
  2018. dest.reset();
  2019. dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
  2020. socklen_t len = dest.getSocklen();
  2021. getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
  2022. dc->setLocal(dest);
  2023. dc->setDestination(conn->d_destination);
  2024. /* we can't move this if we want to be able to access the values in
  2025. all queries sent over this connection */
  2026. dc->d_proxyProtocolValues = conn->proxyProtocolValues;
  2027. DNSName qname;
  2028. uint16_t qtype=0;
  2029. uint16_t qclass=0;
  2030. bool needECS = false;
  2031. bool needXPF = g_XPFAcl.match(conn->d_remote);
  2032. string requestorId;
  2033. string deviceId;
  2034. string deviceName;
  2035. bool logQuery = false;
  2036. #ifdef HAVE_PROTOBUF
  2037. auto luaconfsLocal = g_luaconfs.getLocal();
  2038. if (checkProtobufExport(luaconfsLocal)) {
  2039. needECS = true;
  2040. }
  2041. logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
  2042. dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
  2043. #endif /* HAVE_PROTOBUF */
  2044. #ifdef HAVE_FSTRM
  2045. checkFrameStreamExport(luaconfsLocal);
  2046. #endif
  2047. if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
  2048. try {
  2049. EDNSOptionViewMap ednsOptions;
  2050. bool xpfFound = false;
  2051. dc->d_ecsParsed = true;
  2052. dc->d_ecsFound = false;
  2053. getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
  2054. dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
  2055. xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
  2056. if(t_pdl) {
  2057. try {
  2058. if (t_pdl->d_gettag_ffi) {
  2059. dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_records, dc->d_data, ednsOptions, true, dc->d_proxyProtocolValues, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_rcode, dc->d_ttlCap, dc->d_variable, logQuery, dc->d_logResponse, dc->d_followCNAMERecords);
  2060. }
  2061. else if (t_pdl->d_gettag) {
  2062. dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_proxyProtocolValues);
  2063. }
  2064. }
  2065. catch(const std::exception& e) {
  2066. if(g_logCommonErrors)
  2067. g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
  2068. }
  2069. }
  2070. }
  2071. catch(const std::exception& e)
  2072. {
  2073. if(g_logCommonErrors)
  2074. g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
  2075. }
  2076. }
  2077. const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
  2078. #ifdef HAVE_PROTOBUF
  2079. if(t_protobufServers || t_outgoingProtobufServers) {
  2080. dc->d_requestorId = requestorId;
  2081. dc->d_deviceId = deviceId;
  2082. dc->d_deviceName = deviceName;
  2083. dc->d_uuid = getUniqueID();
  2084. }
  2085. if(t_protobufServers) {
  2086. try {
  2087. if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
  2088. protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
  2089. }
  2090. }
  2091. catch(std::exception& e) {
  2092. if(g_logCommonErrors)
  2093. g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
  2094. }
  2095. }
  2096. #endif
  2097. if(t_pdl) {
  2098. if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
  2099. if(!g_quiet)
  2100. g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
  2101. g_stats.policyDrops++;
  2102. return;
  2103. }
  2104. }
  2105. if(dc->d_mdp.d_header.qr) {
  2106. g_stats.ignoredCount++;
  2107. if(g_logCommonErrors) {
  2108. g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
  2109. }
  2110. return;
  2111. }
  2112. if(dc->d_mdp.d_header.opcode) {
  2113. g_stats.ignoredCount++;
  2114. if(g_logCommonErrors) {
  2115. g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
  2116. }
  2117. return;
  2118. }
  2119. else if (dh->qdcount == 0) {
  2120. g_stats.emptyQueriesCount++;
  2121. if(g_logCommonErrors) {
  2122. g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
  2123. }
  2124. return;
  2125. }
  2126. else {
  2127. ++g_stats.qcounter;
  2128. ++g_stats.tcpqcounter;
  2129. ++conn->d_requestsInFlight;
  2130. if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
  2131. t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
  2132. } else {
  2133. Utility::gettimeofday(&g_now, 0); // needed?
  2134. struct timeval ttd = g_now;
  2135. t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
  2136. }
  2137. MT->makeThread(startDoResolve, dc.release()); // deletes dc
  2138. return;
  2139. }
  2140. }
  2141. }
  2142. }
  2143. static bool expectProxyProtocol(const ComboAddress& from)
  2144. {
  2145. return g_proxyProtocolACL.match(from);
  2146. }
  2147. //! Handle new incoming TCP connection
  2148. static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
  2149. {
  2150. ComboAddress addr;
  2151. socklen_t addrlen=sizeof(addr);
  2152. int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
  2153. if(newsock>=0) {
  2154. if(MT->numProcesses() > g_maxMThreads) {
  2155. g_stats.overCapacityDrops++;
  2156. try {
  2157. closesocket(newsock);
  2158. }
  2159. catch(const PDNSException& e) {
  2160. g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
  2161. }
  2162. return;
  2163. }
  2164. if(t_remotes) {
  2165. t_remotes->push_back(addr);
  2166. }
  2167. bool fromProxyProtocolSource = expectProxyProtocol(addr);
  2168. if(t_allowFrom && !t_allowFrom->match(&addr) && !fromProxyProtocolSource) {
  2169. if(!g_quiet)
  2170. g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address neither matched by allow-from nor proxy-protocol-from"<<endl;
  2171. g_stats.unauthorizedTCP++;
  2172. try {
  2173. closesocket(newsock);
  2174. }
  2175. catch(const PDNSException& e) {
  2176. g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
  2177. }
  2178. return;
  2179. }
  2180. if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
  2181. g_stats.tcpClientOverflow++;
  2182. try {
  2183. closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
  2184. }
  2185. catch(const PDNSException& e) {
  2186. g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
  2187. }
  2188. return;
  2189. }
  2190. setNonBlocking(newsock);
  2191. std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
  2192. tc->d_source = addr;
  2193. tc->d_destination.reset();
  2194. tc->d_destination.sin4.sin_family = addr.sin4.sin_family;
  2195. socklen_t len = tc->d_destination.getSocklen();
  2196. getsockname(tc->getFD(), reinterpret_cast<sockaddr*>(&tc->d_destination), &len); // if this fails, we're ok with it
  2197. if (fromProxyProtocolSource) {
  2198. tc->proxyProtocolNeed = s_proxyProtocolMinimumHeaderSize;
  2199. tc->data.resize(tc->proxyProtocolNeed);
  2200. tc->state = TCPConnection::PROXYPROTOCOLHEADER;
  2201. }
  2202. else {
  2203. tc->state = TCPConnection::BYTE0;
  2204. }
  2205. struct timeval ttd;
  2206. Utility::gettimeofday(&ttd, 0);
  2207. ttd.tv_sec += g_tcpTimeout;
  2208. t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
  2209. }
  2210. }
  2211. static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, struct timeval tv, int fd, std::vector<ProxyProtocolValue>& proxyProtocolValues)
  2212. {
  2213. gettimeofday(&g_now, 0);
  2214. if (tv.tv_sec) {
  2215. struct timeval diff = g_now - tv;
  2216. double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
  2217. if(delta > 1000.0) {
  2218. g_stats.tooOldDrops++;
  2219. return nullptr;
  2220. }
  2221. }
  2222. ++g_stats.qcounter;
  2223. if(fromaddr.sin4.sin_family==AF_INET6)
  2224. g_stats.ipv6qcounter++;
  2225. string response;
  2226. const struct dnsheader* dh = (struct dnsheader*)question.c_str();
  2227. unsigned int ctag=0;
  2228. uint32_t qhash = 0;
  2229. bool needECS = false;
  2230. bool needXPF = g_XPFAcl.match(fromaddr);
  2231. std::unordered_set<std::string> policyTags;
  2232. LuaContext::LuaObject data;
  2233. string requestorId;
  2234. string deviceId;
  2235. string deviceName;
  2236. string routingTag;
  2237. bool logQuery = false;
  2238. bool logResponse = false;
  2239. #ifdef HAVE_PROTOBUF
  2240. boost::uuids::uuid uniqueId;
  2241. auto luaconfsLocal = g_luaconfs.getLocal();
  2242. if (checkProtobufExport(luaconfsLocal)) {
  2243. uniqueId = getUniqueID();
  2244. needECS = true;
  2245. } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
  2246. uniqueId = getUniqueID();
  2247. }
  2248. logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
  2249. logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
  2250. #endif
  2251. #ifdef HAVE_FSTRM
  2252. checkFrameStreamExport(luaconfsLocal);
  2253. #endif
  2254. EDNSSubnetOpts ednssubnet;
  2255. bool ecsFound = false;
  2256. bool ecsParsed = false;
  2257. uint16_t ecsBegin = 0;
  2258. uint16_t ecsEnd = 0;
  2259. std::vector<DNSRecord> records;
  2260. boost::optional<int> rcode = boost::none;
  2261. uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
  2262. bool variable = false;
  2263. bool followCNAMEs = false;
  2264. try {
  2265. DNSName qname;
  2266. uint16_t qtype=0;
  2267. uint16_t qclass=0;
  2268. uint32_t age;
  2269. bool qnameParsed=false;
  2270. #ifdef MALLOC_TRACE
  2271. /*
  2272. static uint64_t last=0;
  2273. if(!last)
  2274. g_mtracer->clearAllocators();
  2275. cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
  2276. last=g_mtracer->getAllocs();
  2277. cout<<g_mtracer->topAllocatorsString()<<endl;
  2278. g_mtracer->clearAllocators();
  2279. */
  2280. #endif
  2281. if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
  2282. try {
  2283. EDNSOptionViewMap ednsOptions;
  2284. bool xpfFound = false;
  2285. ecsFound = false;
  2286. getQNameAndSubnet(question, &qname, &qtype, &qclass,
  2287. ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
  2288. xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
  2289. qnameParsed = true;
  2290. ecsParsed = true;
  2291. if(t_pdl) {
  2292. try {
  2293. if (t_pdl->d_gettag_ffi) {
  2294. ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, records, data, ednsOptions, false, proxyProtocolValues, requestorId, deviceId, deviceName, routingTag, rcode, ttlCap, variable, logQuery, logResponse, followCNAMEs);
  2295. }
  2296. else if (t_pdl->d_gettag) {
  2297. ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, routingTag, proxyProtocolValues);
  2298. }
  2299. }
  2300. catch(const std::exception& e) {
  2301. if(g_logCommonErrors)
  2302. g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
  2303. }
  2304. }
  2305. }
  2306. catch(const std::exception& e)
  2307. {
  2308. if(g_logCommonErrors)
  2309. g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
  2310. }
  2311. }
  2312. bool cacheHit = false;
  2313. boost::optional<RecProtoBufMessage> pbMessage(boost::none);
  2314. #ifdef HAVE_PROTOBUF
  2315. if (t_protobufServers) {
  2316. pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
  2317. pbMessage->setServerIdentity(SyncRes::s_serverID);
  2318. if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
  2319. protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
  2320. }
  2321. }
  2322. #endif /* HAVE_PROTOBUF */
  2323. /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
  2324. but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
  2325. as cacheable we would cache it with a wrong tag, so better safe than sorry. */
  2326. vState valState;
  2327. if (qnameParsed) {
  2328. cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
  2329. }
  2330. else {
  2331. cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
  2332. }
  2333. if (cacheHit) {
  2334. if (vStateIsBogus(valState)) {
  2335. if(t_bogusremotes)
  2336. t_bogusremotes->push_back(source);
  2337. if(t_bogusqueryring)
  2338. t_bogusqueryring->push_back(make_pair(qname, qtype));
  2339. }
  2340. #ifdef HAVE_PROTOBUF
  2341. if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
  2342. Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
  2343. ComboAddress requestor = requestorNM.getMaskedNetwork();
  2344. requestor.setPort(source.getPort());
  2345. pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
  2346. pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
  2347. if (g_useKernelTimestamp && tv.tv_sec) {
  2348. pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
  2349. }
  2350. else {
  2351. pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
  2352. }
  2353. pbMessage->setRequestorId(requestorId);
  2354. pbMessage->setDeviceId(deviceId);
  2355. pbMessage->setDeviceName(deviceName);
  2356. protobufLogResponse(*pbMessage);
  2357. }
  2358. #endif /* HAVE_PROTOBUF */
  2359. if(!g_quiet)
  2360. g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
  2361. g_stats.packetCacheHits++;
  2362. SyncRes::s_queries++;
  2363. ageDNSPacket(response, age);
  2364. struct msghdr msgh;
  2365. struct iovec iov;
  2366. cmsgbuf_aligned cbuf;
  2367. fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
  2368. msgh.msg_control=NULL;
  2369. if(g_fromtosockets.count(fd)) {
  2370. addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
  2371. }
  2372. if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors) {
  2373. int err = errno;
  2374. g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
  2375. << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
  2376. << strerror(err) << endl;
  2377. }
  2378. if(response.length() >= sizeof(struct dnsheader)) {
  2379. struct dnsheader tmpdh;
  2380. memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
  2381. updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
  2382. }
  2383. g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
  2384. g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
  2385. return 0;
  2386. }
  2387. }
  2388. catch(std::exception& e) {
  2389. if(g_logCommonErrors)
  2390. g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
  2391. return 0;
  2392. }
  2393. if(t_pdl) {
  2394. if(t_pdl->ipfilter(source, destination, *dh)) {
  2395. if(!g_quiet)
  2396. g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
  2397. g_stats.policyDrops++;
  2398. return 0;
  2399. }
  2400. }
  2401. if(MT->numProcesses() > g_maxMThreads) {
  2402. if(!g_quiet)
  2403. g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
  2404. g_stats.overCapacityDrops++;
  2405. return 0;
  2406. }
  2407. auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
  2408. dc->setSocket(fd);
  2409. dc->d_tag=ctag;
  2410. dc->d_qhash=qhash;
  2411. dc->setRemote(fromaddr);
  2412. dc->setSource(source);
  2413. dc->setLocal(destaddr);
  2414. dc->setDestination(destination);
  2415. dc->d_tcp=false;
  2416. dc->d_ecsFound = ecsFound;
  2417. dc->d_ecsParsed = ecsParsed;
  2418. dc->d_ecsBegin = ecsBegin;
  2419. dc->d_ecsEnd = ecsEnd;
  2420. dc->d_ednssubnet = ednssubnet;
  2421. dc->d_ttlCap = ttlCap;
  2422. dc->d_variable = variable;
  2423. dc->d_followCNAMERecords = followCNAMEs;
  2424. dc->d_rcode = rcode;
  2425. dc->d_logResponse = logResponse;
  2426. #ifdef HAVE_PROTOBUF
  2427. if (t_protobufServers || t_outgoingProtobufServers) {
  2428. dc->d_uuid = std::move(uniqueId);
  2429. }
  2430. dc->d_requestorId = requestorId;
  2431. dc->d_deviceId = deviceId;
  2432. dc->d_deviceName = deviceName;
  2433. dc->d_kernelTimestamp = tv;
  2434. #endif
  2435. dc->d_proxyProtocolValues = std::move(proxyProtocolValues);
  2436. dc->d_routingTag = std::move(routingTag);
  2437. MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
  2438. return 0;
  2439. }
  2440. static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
  2441. {
  2442. ssize_t len;
  2443. static const size_t maxIncomingQuerySize = g_proxyProtocolACL.empty() ? 512 : (512 + g_proxyProtocolMaximumSize);
  2444. static thread_local std::string data;
  2445. ComboAddress fromaddr;
  2446. ComboAddress source;
  2447. ComboAddress destination;
  2448. struct msghdr msgh;
  2449. struct iovec iov;
  2450. cmsgbuf_aligned cbuf;
  2451. bool firstQuery = true;
  2452. std::vector<ProxyProtocolValue> proxyProtocolValues;
  2453. for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
  2454. bool proxyProto = false;
  2455. data.resize(maxIncomingQuerySize);
  2456. fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
  2457. fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
  2458. if((len=recvmsg(fd, &msgh, 0)) >= 0) {
  2459. firstQuery = false;
  2460. if (msgh.msg_flags & MSG_TRUNC) {
  2461. g_stats.truncatedDrops++;
  2462. if (!g_quiet) {
  2463. g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
  2464. }
  2465. return;
  2466. }
  2467. data.resize(static_cast<size_t>(len));
  2468. if (expectProxyProtocol(fromaddr)) {
  2469. bool tcp;
  2470. ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
  2471. if (used <= 0) {
  2472. ++g_stats.proxyProtocolInvalidCount;
  2473. if (!g_quiet) {
  2474. g_log<<Logger::Error<<"Ignoring invalid proxy protocol ("<<std::to_string(len)<<", "<<std::to_string(used)<<") query from "<<fromaddr.toStringWithPort()<<endl;
  2475. }
  2476. return;
  2477. }
  2478. else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
  2479. if (g_quiet) {
  2480. g_log<<Logger::Error<<"Proxy protocol header in UDP packet from "<< fromaddr.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
  2481. }
  2482. ++g_stats.proxyProtocolInvalidCount;
  2483. return;
  2484. }
  2485. data.erase(0, used);
  2486. }
  2487. else if (len > 512) {
  2488. /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
  2489. g_stats.truncatedDrops++;
  2490. if (!g_quiet) {
  2491. g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toStringWithPort()<<endl;
  2492. }
  2493. return;
  2494. }
  2495. if (data.size() < sizeof(dnsheader)) {
  2496. g_stats.ignoredCount++;
  2497. if (!g_quiet) {
  2498. g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(data.size())<<") query from "<<fromaddr.toString()<<endl;
  2499. }
  2500. return;
  2501. }
  2502. if (!proxyProto) {
  2503. source = fromaddr;
  2504. }
  2505. if(t_remotes) {
  2506. t_remotes->push_back(fromaddr);
  2507. }
  2508. if(t_allowFrom && !t_allowFrom->match(&source)) {
  2509. if(!g_quiet) {
  2510. g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<source.toString()<<", address not matched by allow-from"<<endl;
  2511. }
  2512. g_stats.unauthorizedUDP++;
  2513. return;
  2514. }
  2515. BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
  2516. if(!fromaddr.sin4.sin_port) { // also works for IPv6
  2517. if(!g_quiet) {
  2518. g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
  2519. }
  2520. g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
  2521. return;
  2522. }
  2523. try {
  2524. dnsheader* dh=(dnsheader*)&data[0];
  2525. if(dh->qr) {
  2526. g_stats.ignoredCount++;
  2527. if(g_logCommonErrors) {
  2528. g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
  2529. }
  2530. }
  2531. else if(dh->opcode) {
  2532. g_stats.ignoredCount++;
  2533. if(g_logCommonErrors) {
  2534. g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
  2535. }
  2536. }
  2537. else if (dh->qdcount == 0) {
  2538. g_stats.emptyQueriesCount++;
  2539. if(g_logCommonErrors) {
  2540. g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
  2541. }
  2542. }
  2543. else {
  2544. struct timeval tv={0,0};
  2545. HarvestTimestamp(&msgh, &tv);
  2546. ComboAddress dest;
  2547. dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
  2548. auto loc = rplookup(g_listenSocketsAddresses, fd);
  2549. if(HarvestDestinationAddress(&msgh, &dest)) {
  2550. // but.. need to get port too
  2551. if(loc) {
  2552. dest.sin4.sin_port = loc->sin4.sin_port;
  2553. }
  2554. }
  2555. else {
  2556. if(loc) {
  2557. dest = *loc;
  2558. }
  2559. else {
  2560. dest.sin4.sin_family = fromaddr.sin4.sin_family;
  2561. socklen_t slen = dest.getSocklen();
  2562. getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
  2563. }
  2564. }
  2565. if (!proxyProto) {
  2566. destination = dest;
  2567. }
  2568. if(g_weDistributeQueries) {
  2569. std::string localdata = data;
  2570. distributeAsyncFunction(data, [localdata, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues]() mutable
  2571. { return doProcessUDPQuestion(localdata, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues); });
  2572. }
  2573. else {
  2574. ++s_threadInfos[t_id].numberOfDistributedQueries;
  2575. doProcessUDPQuestion(data, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues);
  2576. }
  2577. }
  2578. }
  2579. catch(const MOADNSException &mde) {
  2580. g_stats.clientParseError++;
  2581. if(g_logCommonErrors) {
  2582. g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
  2583. }
  2584. }
  2585. catch(const std::runtime_error& e) {
  2586. g_stats.clientParseError++;
  2587. if(g_logCommonErrors) {
  2588. g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
  2589. }
  2590. }
  2591. }
  2592. else {
  2593. // cerr<<t_id<<" had error: "<<stringerror()<<endl;
  2594. if(firstQuery && errno == EAGAIN) {
  2595. g_stats.noPacketError++;
  2596. }
  2597. break;
  2598. }
  2599. }
  2600. }
  2601. static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
  2602. {
  2603. int fd;
  2604. vector<string>locals;
  2605. stringtok(locals,::arg()["local-address"]," ,");
  2606. if(locals.empty())
  2607. throw PDNSException("No local address specified");
  2608. for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
  2609. ServiceTuple st;
  2610. st.port=::arg().asNum("local-port");
  2611. parseService(*i, st);
  2612. ComboAddress sin;
  2613. sin.reset();
  2614. sin.sin4.sin_family = AF_INET;
  2615. if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
  2616. sin.sin6.sin6_family = AF_INET6;
  2617. if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
  2618. throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
  2619. }
  2620. fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
  2621. if(fd<0)
  2622. throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
  2623. setCloseOnExec(fd);
  2624. int tmp=1;
  2625. if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
  2626. g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
  2627. exit(1);
  2628. }
  2629. if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
  2630. int err = errno;
  2631. g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
  2632. }
  2633. #ifdef TCP_DEFER_ACCEPT
  2634. if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
  2635. if(i==locals.begin())
  2636. g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
  2637. }
  2638. #endif
  2639. if( ::arg().mustDo("non-local-bind") )
  2640. Utility::setBindAny(AF_INET, fd);
  2641. if (g_reusePort) {
  2642. #if defined(SO_REUSEPORT_LB)
  2643. try {
  2644. SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
  2645. }
  2646. catch (const std::exception& e) {
  2647. throw PDNSException(std::string("SO_REUSEPORT_LB: ") + e.what());
  2648. }
  2649. #elif defined(SO_REUSEPORT)
  2650. try {
  2651. SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
  2652. }
  2653. catch (const std::exception& e) {
  2654. throw PDNSException(std::string("SO_REUSEPORT: ") + e.what());
  2655. }
  2656. #endif
  2657. }
  2658. if (::arg().asNum("tcp-fast-open") > 0) {
  2659. #ifdef TCP_FASTOPEN
  2660. int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
  2661. if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
  2662. int err = errno;
  2663. g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
  2664. }
  2665. #else
  2666. g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
  2667. #endif
  2668. }
  2669. sin.sin4.sin_port = htons(st.port);
  2670. socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
  2671. if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
  2672. throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
  2673. setNonBlocking(fd);
  2674. setSocketSendBuffer(fd, 65000);
  2675. listen(fd, 128);
  2676. deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
  2677. tcpSockets.insert(fd);
  2678. // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
  2679. // - fd is not that which we know here, but returned from accept()
  2680. if(sin.sin4.sin_family == AF_INET)
  2681. g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
  2682. else
  2683. g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
  2684. }
  2685. }
  2686. static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
  2687. {
  2688. int one=1;
  2689. vector<string>locals;
  2690. stringtok(locals,::arg()["local-address"]," ,");
  2691. if(locals.empty())
  2692. throw PDNSException("No local address specified");
  2693. for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
  2694. ServiceTuple st;
  2695. st.port=::arg().asNum("local-port");
  2696. parseService(*i, st);
  2697. ComboAddress sin;
  2698. sin.reset();
  2699. sin.sin4.sin_family = AF_INET;
  2700. if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
  2701. sin.sin6.sin6_family = AF_INET6;
  2702. if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
  2703. throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
  2704. }
  2705. int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
  2706. if(fd < 0) {
  2707. throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
  2708. }
  2709. if (!setSocketTimestamps(fd))
  2710. g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
  2711. if(IsAnyAddress(sin)) {
  2712. if(sin.sin4.sin_family == AF_INET)
  2713. if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
  2714. g_fromtosockets.insert(fd);
  2715. #ifdef IPV6_RECVPKTINFO
  2716. if(sin.sin4.sin_family == AF_INET6)
  2717. if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
  2718. g_fromtosockets.insert(fd);
  2719. #endif
  2720. if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
  2721. int err = errno;
  2722. g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
  2723. }
  2724. }
  2725. if( ::arg().mustDo("non-local-bind") )
  2726. Utility::setBindAny(AF_INET6, fd);
  2727. setCloseOnExec(fd);
  2728. setSocketReceiveBuffer(fd, 250000);
  2729. sin.sin4.sin_port = htons(st.port);
  2730. if (g_reusePort) {
  2731. #if defined(SO_REUSEPORT_LB)
  2732. try {
  2733. SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
  2734. }
  2735. catch (const std::exception& e) {
  2736. throw PDNSException(std::string("SO_REUSEPORT_LB: ") + e.what());
  2737. }
  2738. #elif defined(SO_REUSEPORT)
  2739. try {
  2740. SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
  2741. }
  2742. catch (const std::exception& e) {
  2743. throw PDNSException(std::string("SO_REUSEPORT: ") + e.what());
  2744. }
  2745. #endif
  2746. }
  2747. if (sin.isIPv4()) {
  2748. try {
  2749. setSocketIgnorePMTU(fd);
  2750. }
  2751. catch(const std::exception& e) {
  2752. g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
  2753. }
  2754. }
  2755. socklen_t socklen=sin.getSocklen();
  2756. if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
  2757. throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
  2758. setNonBlocking(fd);
  2759. deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
  2760. g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
  2761. if(sin.sin4.sin_family == AF_INET)
  2762. g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
  2763. else
  2764. g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
  2765. }
  2766. }
  2767. static void daemonize(void)
  2768. {
  2769. if(fork())
  2770. exit(0); // bye bye
  2771. setsid();
  2772. int i=open("/dev/null",O_RDWR); /* open stdin */
  2773. if(i < 0)
  2774. g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
  2775. else {
  2776. dup2(i,0); /* stdin */
  2777. dup2(i,1); /* stderr */
  2778. dup2(i,2); /* stderr */
  2779. close(i);
  2780. }
  2781. }
  2782. static void termIntHandler(int)
  2783. {
  2784. doExit();
  2785. }
  2786. static void usr1Handler(int)
  2787. {
  2788. statsWanted=true;
  2789. }
  2790. static void usr2Handler(int)
  2791. {
  2792. g_quiet= !g_quiet;
  2793. SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
  2794. ::arg().set("quiet")=g_quiet ? "" : "no";
  2795. }
  2796. static void doStats(void)
  2797. {
  2798. static time_t lastOutputTime;
  2799. static uint64_t lastQueryCount;
  2800. uint64_t cacheHits = s_RC->cacheHits;
  2801. uint64_t cacheMisses = s_RC->cacheMisses;
  2802. uint64_t cacheSize = s_RC->size();
  2803. auto rc_stats = s_RC->stats();
  2804. double r = rc_stats.second == 0 ? 0.0 : (100.0 * rc_stats.first / rc_stats.second);
  2805. if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
  2806. g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
  2807. cacheSize << " cache entries, "<<
  2808. broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
  2809. (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
  2810. g_log << Logger::Notice<< "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
  2811. g_log<<Logger::Notice<<"stats: throttle map: "
  2812. << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
  2813. << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
  2814. << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
  2815. <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
  2816. g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
  2817. g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
  2818. <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
  2819. g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
  2820. broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
  2821. //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
  2822. //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
  2823. g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
  2824. " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
  2825. size_t idx = 0;
  2826. for (const auto& threadInfo : s_threadInfos) {
  2827. if(threadInfo.isWorker) {
  2828. g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
  2829. ++idx;
  2830. }
  2831. }
  2832. time_t now = time(0);
  2833. if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
  2834. g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
  2835. }
  2836. lastOutputTime = now;
  2837. lastQueryCount = SyncRes::s_queries;
  2838. }
  2839. else if(statsWanted)
  2840. g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
  2841. statsWanted=false;
  2842. }
  2843. static void houseKeeping(void *)
  2844. {
  2845. static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0}, last_RC_prune;
  2846. static thread_local struct timeval last_prune;
  2847. static thread_local int cleanCounter=0;
  2848. static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
  2849. auto luaconfsLocal = g_luaconfs.getLocal();
  2850. if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
  2851. // Loading the Lua config file already "refreshed" the TAs
  2852. last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
  2853. }
  2854. try {
  2855. if(s_running) {
  2856. return;
  2857. }
  2858. s_running=true;
  2859. struct timeval now, past;
  2860. Utility::gettimeofday(&now, nullptr);
  2861. past = now;
  2862. past.tv_sec -= 5;
  2863. if (last_prune < past) {
  2864. t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
  2865. SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
  2866. time_t limit;
  2867. if(!((cleanCounter++)%40)) { // this is a full scan!
  2868. limit=now.tv_sec-300;
  2869. SyncRes::pruneNSSpeeds(limit);
  2870. }
  2871. limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
  2872. SyncRes::pruneFailedServers(limit);
  2873. limit = now.tv_sec - 2*3600;
  2874. SyncRes::pruneEDNSStatuses(limit);
  2875. SyncRes::pruneThrottledServers();
  2876. Utility::gettimeofday(&last_prune, nullptr);
  2877. }
  2878. if(isHandlerThread()) {
  2879. if (now.tv_sec - last_RC_prune > 5) {
  2880. s_RC->doPrune(g_maxCacheEntries);
  2881. last_RC_prune = now.tv_sec;
  2882. }
  2883. // XXX !!! global
  2884. if (now.tv_sec - last_rootupdate > 7200) {
  2885. int res = SyncRes::getRootNS(g_now, nullptr, 0);
  2886. if (!res) {
  2887. last_rootupdate=now.tv_sec;
  2888. try {
  2889. primeRootNSZones(g_dnssecmode != DNSSECMode::Off, 0);
  2890. }
  2891. catch (const std::exception& e) {
  2892. g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.what()<<endl;
  2893. }
  2894. catch (const PDNSException& e) {
  2895. g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.reason<<endl;
  2896. }
  2897. catch (const ImmediateServFailException& e) {
  2898. g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.reason<<endl;
  2899. }
  2900. catch (const PolicyHitException& e) {
  2901. g_log<<Logger::Error<<"Policy hit while priming the root NS zones"<<endl;
  2902. }
  2903. catch (...)
  2904. {
  2905. g_log<<Logger::Error<<"Exception while priming the root NS zones"<<endl;
  2906. }
  2907. }
  2908. }
  2909. if(now.tv_sec - last_secpoll >= 3600) {
  2910. try {
  2911. doSecPoll(&last_secpoll);
  2912. }
  2913. catch (const std::exception& e)
  2914. {
  2915. g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
  2916. }
  2917. catch (const PDNSException& e)
  2918. {
  2919. g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
  2920. }
  2921. catch (const ImmediateServFailException &e)
  2922. {
  2923. g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
  2924. }
  2925. catch (const PolicyHitException& e) {
  2926. g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
  2927. }
  2928. catch (...)
  2929. {
  2930. g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
  2931. }
  2932. }
  2933. if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
  2934. g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
  2935. g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
  2936. try {
  2937. map<DNSName, dsmap_t> dsAnchors;
  2938. if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
  2939. g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
  2940. lci.dsAnchors = dsAnchors;
  2941. });
  2942. }
  2943. last_trustAnchorUpdate = now.tv_sec;
  2944. } catch (const PDNSException &pe) {
  2945. g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
  2946. }
  2947. }
  2948. }
  2949. s_running = false;
  2950. }
  2951. catch (const PDNSException& ae)
  2952. {
  2953. s_running = false;
  2954. g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
  2955. throw;
  2956. }
  2957. catch (...)
  2958. {
  2959. s_running = false;
  2960. g_log<<Logger::Error<<"Uncaught exception in housekeeping thread"<<endl;
  2961. throw;
  2962. }
  2963. }
  2964. static void makeThreadPipes()
  2965. {
  2966. auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
  2967. if (pipeBufferSize > 0) {
  2968. g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
  2969. }
  2970. /* thread 0 is the handler / SNMP, we start at 1 */
  2971. for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
  2972. auto& threadInfos = s_threadInfos.at(n);
  2973. int fd[2];
  2974. if(pipe(fd) < 0)
  2975. unixDie("Creating pipe for inter-thread communications");
  2976. threadInfos.pipes.readToThread = fd[0];
  2977. threadInfos.pipes.writeToThread = fd[1];
  2978. if(pipe(fd) < 0)
  2979. unixDie("Creating pipe for inter-thread communications");
  2980. threadInfos.pipes.readFromThread = fd[0];
  2981. threadInfos.pipes.writeFromThread = fd[1];
  2982. if(pipe(fd) < 0)
  2983. unixDie("Creating pipe for inter-thread communications");
  2984. threadInfos.pipes.readQueriesToThread = fd[0];
  2985. threadInfos.pipes.writeQueriesToThread = fd[1];
  2986. if (pipeBufferSize > 0) {
  2987. if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
  2988. int err = errno;
  2989. g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
  2990. auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
  2991. if (existingSize > 0) {
  2992. g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
  2993. }
  2994. }
  2995. }
  2996. if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
  2997. unixDie("Making pipe for inter-thread communications non-blocking");
  2998. }
  2999. }
  3000. }
  3001. struct ThreadMSG
  3002. {
  3003. pipefunc_t func;
  3004. bool wantAnswer;
  3005. };
  3006. void broadcastFunction(const pipefunc_t& func)
  3007. {
  3008. /* This function might be called by the worker with t_id 0 during startup
  3009. for the initialization of ACLs and domain maps. After that it should only
  3010. be called by the handler. */
  3011. if (s_threadInfos.empty() && isHandlerThread()) {
  3012. /* the handler and distributors will call themselves below, but
  3013. during startup we get called while s_threadInfos has not been
  3014. populated yet to update the ACL or domain maps, so we need to
  3015. handle that case.
  3016. */
  3017. func();
  3018. }
  3019. unsigned int n = 0;
  3020. for (const auto& threadInfo : s_threadInfos) {
  3021. if(n++ == t_id) {
  3022. func(); // don't write to ourselves!
  3023. continue;
  3024. }
  3025. ThreadMSG* tmsg = new ThreadMSG();
  3026. tmsg->func = func;
  3027. tmsg->wantAnswer = true;
  3028. if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
  3029. delete tmsg;
  3030. unixDie("write to thread pipe returned wrong size or error");
  3031. }
  3032. string* resp = nullptr;
  3033. if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
  3034. unixDie("read from thread pipe returned wrong size or error");
  3035. if(resp) {
  3036. delete resp;
  3037. resp = nullptr;
  3038. }
  3039. }
  3040. }
  3041. static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
  3042. {
  3043. auto& targetInfo = s_threadInfos[target];
  3044. if(!targetInfo.isWorker) {
  3045. g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
  3046. exit(1);
  3047. }
  3048. const auto& tps = targetInfo.pipes;
  3049. ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
  3050. if (written > 0) {
  3051. if (static_cast<size_t>(written) != sizeof(tmsg)) {
  3052. delete tmsg;
  3053. unixDie("write to thread pipe returned wrong size or error");
  3054. }
  3055. }
  3056. else {
  3057. int error = errno;
  3058. if (error == EAGAIN || error == EWOULDBLOCK) {
  3059. return false;
  3060. } else {
  3061. delete tmsg;
  3062. unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
  3063. }
  3064. }
  3065. ++targetInfo.numberOfDistributedQueries;
  3066. return true;
  3067. }
  3068. static unsigned int getWorkerLoad(size_t workerIdx)
  3069. {
  3070. const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
  3071. if (mt != nullptr) {
  3072. return mt->numProcesses();
  3073. }
  3074. return 0;
  3075. }
  3076. static unsigned int selectWorker(unsigned int hash)
  3077. {
  3078. if (s_balancingFactor == 0) {
  3079. return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
  3080. }
  3081. /* we start with one, representing the query we are currently handling */
  3082. double currentLoad = 1;
  3083. std::vector<unsigned int> load(g_numWorkerThreads);
  3084. for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
  3085. load[idx] = getWorkerLoad(idx);
  3086. currentLoad += load[idx];
  3087. // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
  3088. }
  3089. double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
  3090. // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
  3091. unsigned int worker = hash % g_numWorkerThreads;
  3092. /* at least one server has to be at or below the average load */
  3093. if (load[worker] > targetLoad) {
  3094. ++g_stats.rebalancedQueries;
  3095. do {
  3096. // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
  3097. worker = (worker + 1) % g_numWorkerThreads;
  3098. }
  3099. while(load[worker] > targetLoad);
  3100. }
  3101. return /* skip handler */ 1 + g_numDistributorThreads + worker;
  3102. }
  3103. // This function is only called by the distributor threads, when pdns-distributes-queries is set
  3104. void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
  3105. {
  3106. if (!isDistributorThread()) {
  3107. g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
  3108. exit(1);
  3109. }
  3110. unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
  3111. unsigned int target = selectWorker(hash);
  3112. ThreadMSG* tmsg = new ThreadMSG();
  3113. tmsg->func = func;
  3114. tmsg->wantAnswer = false;
  3115. if (!trySendingQueryToWorker(target, tmsg)) {
  3116. /* if this function failed but did not raise an exception, it means that the pipe
  3117. was full, let's try another one */
  3118. unsigned int newTarget = 0;
  3119. do {
  3120. newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
  3121. } while (newTarget == target);
  3122. if (!trySendingQueryToWorker(newTarget, tmsg)) {
  3123. g_stats.queryPipeFullDrops++;
  3124. delete tmsg;
  3125. }
  3126. }
  3127. }
  3128. static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
  3129. {
  3130. ThreadMSG* tmsg = nullptr;
  3131. if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
  3132. unixDie("read from thread pipe returned wrong size or error");
  3133. }
  3134. void *resp=0;
  3135. try {
  3136. resp = tmsg->func();
  3137. }
  3138. catch(std::exception& e) {
  3139. if(g_logCommonErrors)
  3140. g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
  3141. }
  3142. catch(PDNSException& e) {
  3143. if(g_logCommonErrors)
  3144. g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
  3145. }
  3146. if(tmsg->wantAnswer) {
  3147. const auto& threadInfo = s_threadInfos.at(t_id);
  3148. if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
  3149. delete tmsg;
  3150. unixDie("write to thread pipe returned wrong size or error");
  3151. }
  3152. }
  3153. delete tmsg;
  3154. }
  3155. template<class T> void *voider(const boost::function<T*()>& func)
  3156. {
  3157. return func();
  3158. }
  3159. static vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
  3160. {
  3161. a.insert(a.end(), b.begin(), b.end());
  3162. return a;
  3163. }
  3164. static vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
  3165. {
  3166. a.insert(a.end(), b.begin(), b.end());
  3167. return a;
  3168. }
  3169. /*
  3170. This function should only be called by the handler to gather metrics, wipe the cache,
  3171. reload the Lua script (not the Lua config) or change the current trace regex,
  3172. and by the SNMP thread to gather metrics. */
  3173. template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
  3174. {
  3175. if (!isHandlerThread()) {
  3176. g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
  3177. exit(1);
  3178. }
  3179. unsigned int n = 0;
  3180. T ret=T();
  3181. for (const auto& threadInfo : s_threadInfos) {
  3182. if (n++ == t_id) {
  3183. continue;
  3184. }
  3185. const auto& tps = threadInfo.pipes;
  3186. ThreadMSG* tmsg = new ThreadMSG();
  3187. tmsg->func = [func]{ return voider<T>(func); };
  3188. tmsg->wantAnswer = true;
  3189. if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
  3190. delete tmsg;
  3191. unixDie("write to thread pipe returned wrong size or error");
  3192. }
  3193. T* resp = nullptr;
  3194. if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
  3195. unixDie("read from thread pipe returned wrong size or error");
  3196. if(resp) {
  3197. ret += *resp;
  3198. delete resp;
  3199. resp = nullptr;
  3200. }
  3201. }
  3202. return ret;
  3203. }
  3204. template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
  3205. template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
  3206. template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
  3207. template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
  3208. template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
  3209. static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
  3210. {
  3211. try {
  3212. string remote;
  3213. string msg=s_rcc.recv(&remote);
  3214. RecursorControlParser rcp;
  3215. RecursorControlParser::func_t* command;
  3216. string answer=rcp.getAnswer(msg, &command);
  3217. // If we are inside a chroot, we need to strip
  3218. if (!arg()["chroot"].empty()) {
  3219. size_t len = arg()["chroot"].length();
  3220. remote = remote.substr(len);
  3221. }
  3222. s_rcc.send(answer, &remote);
  3223. command();
  3224. }
  3225. catch(const std::exception& e) {
  3226. g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
  3227. }
  3228. catch(const PDNSException& ae) {
  3229. g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
  3230. }
  3231. }
  3232. static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
  3233. {
  3234. PacketID* pident=any_cast<PacketID>(&var);
  3235. // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
  3236. shared_array<char> buffer(new char[pident->inNeeded]);
  3237. ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
  3238. if(ret > 0) {
  3239. pident->inMSG.append(&buffer[0], &buffer[ret]);
  3240. pident->inNeeded-=(size_t)ret;
  3241. if(!pident->inNeeded || pident->inIncompleteOkay) {
  3242. // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
  3243. PacketID pid=*pident;
  3244. string msg=pident->inMSG;
  3245. t_fdm->removeReadFD(fd);
  3246. MT->sendEvent(pid, &msg);
  3247. }
  3248. else {
  3249. // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
  3250. }
  3251. }
  3252. else {
  3253. PacketID tmp=*pident;
  3254. t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
  3255. string empty;
  3256. MT->sendEvent(tmp, &empty); // this conveys error status
  3257. }
  3258. }
  3259. static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
  3260. {
  3261. PacketID* pid=any_cast<PacketID>(&var);
  3262. ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
  3263. if(ret > 0) {
  3264. pid->outPos+=(ssize_t)ret;
  3265. if(pid->outPos==pid->outMSG.size()) {
  3266. PacketID tmp=*pid;
  3267. t_fdm->removeWriteFD(fd);
  3268. MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
  3269. }
  3270. }
  3271. else { // error or EOF
  3272. PacketID tmp(*pid);
  3273. t_fdm->removeWriteFD(fd);
  3274. string sent;
  3275. MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
  3276. }
  3277. }
  3278. // resend event to everybody chained onto it
  3279. static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
  3280. {
  3281. // We close the chain for new entries, since they won't be processed anyway
  3282. iter->key.closed = true;
  3283. if(iter->key.chain.empty())
  3284. return;
  3285. // cerr<<"doResends called!\n";
  3286. for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
  3287. resend.fd=-1;
  3288. resend.id=*i;
  3289. // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
  3290. MT->sendEvent(resend, &content);
  3291. g_stats.chainResends++;
  3292. }
  3293. }
  3294. static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
  3295. {
  3296. PacketID pid=any_cast<PacketID>(var);
  3297. ssize_t len;
  3298. std::string packet;
  3299. packet.resize(g_outgoingEDNSBufsize);
  3300. ComboAddress fromaddr;
  3301. socklen_t addrlen=sizeof(fromaddr);
  3302. len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
  3303. if(len < (ssize_t) sizeof(dnsheader)) {
  3304. if(len < 0)
  3305. ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
  3306. else {
  3307. g_stats.serverParseError++;
  3308. if(g_logCommonErrors)
  3309. g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
  3310. ": packet smaller than DNS header"<<endl;
  3311. }
  3312. t_udpclientsocks->returnSocket(fd);
  3313. string empty;
  3314. MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
  3315. if(iter != MT->d_waiters.end())
  3316. doResends(iter, pid, empty);
  3317. MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
  3318. return;
  3319. }
  3320. packet.resize(len);
  3321. dnsheader dh;
  3322. memcpy(&dh, &packet.at(0), sizeof(dh));
  3323. PacketID pident;
  3324. pident.remote=fromaddr;
  3325. pident.id=dh.id;
  3326. pident.fd=fd;
  3327. if(!dh.qr && g_logCommonErrors) {
  3328. g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
  3329. }
  3330. if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
  3331. !dh.qr) { // one weird server
  3332. pident.domain.clear();
  3333. pident.type = 0;
  3334. }
  3335. else {
  3336. try {
  3337. if(len > 12)
  3338. pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
  3339. }
  3340. catch(std::exception& e) {
  3341. g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
  3342. g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
  3343. return;
  3344. }
  3345. }
  3346. MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
  3347. if(iter != MT->d_waiters.end()) {
  3348. doResends(iter, pident, packet);
  3349. }
  3350. retryWithName:
  3351. if(!MT->sendEvent(pident, &packet)) {
  3352. /* we did not find a match for this response, something is wrong */
  3353. // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
  3354. for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
  3355. if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
  3356. pident.domain == mthread->key.domain) {
  3357. mthread->key.nearMisses++;
  3358. }
  3359. // be a bit paranoid here since we're weakening our matching
  3360. if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
  3361. pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
  3362. // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
  3363. pident.domain = mthread->key.domain;
  3364. pident.type = mthread->key.type;
  3365. goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
  3366. }
  3367. }
  3368. g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
  3369. if(g_logCommonErrors) {
  3370. g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
  3371. }
  3372. }
  3373. else if(fd >= 0) {
  3374. /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
  3375. t_udpclientsocks->returnSocket(fd);
  3376. }
  3377. }
  3378. static FDMultiplexer* getMultiplexer()
  3379. {
  3380. FDMultiplexer* ret;
  3381. for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
  3382. try {
  3383. ret=i.second();
  3384. return ret;
  3385. }
  3386. catch(FDMultiplexerException &fe) {
  3387. g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
  3388. }
  3389. catch(...) {
  3390. g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
  3391. }
  3392. }
  3393. g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
  3394. exit(1);
  3395. }
  3396. static string* doReloadLuaScript()
  3397. {
  3398. string fname= ::arg()["lua-dns-script"];
  3399. try {
  3400. if(fname.empty()) {
  3401. t_pdl.reset();
  3402. g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
  3403. return new string("unloaded\n");
  3404. }
  3405. else {
  3406. t_pdl = std::make_shared<RecursorLua4>();
  3407. t_pdl->loadFile(fname);
  3408. }
  3409. }
  3410. catch(std::exception& e) {
  3411. g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
  3412. return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
  3413. }
  3414. g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
  3415. return new string("(re)loaded '"+fname+"'\n");
  3416. }
  3417. string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
  3418. {
  3419. if(begin != end)
  3420. ::arg().set("lua-dns-script") = *begin;
  3421. return broadcastAccFunction<string>(doReloadLuaScript);
  3422. }
  3423. static string* pleaseUseNewTraceRegex(const std::string& newRegex)
  3424. try
  3425. {
  3426. if(newRegex.empty()) {
  3427. t_traceRegex.reset();
  3428. return new string("unset\n");
  3429. }
  3430. else {
  3431. t_traceRegex = std::make_shared<Regex>(newRegex);
  3432. return new string("ok\n");
  3433. }
  3434. }
  3435. catch(PDNSException& ae)
  3436. {
  3437. return new string(ae.reason+"\n");
  3438. }
  3439. string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
  3440. {
  3441. return broadcastAccFunction<string>([=]{ return pleaseUseNewTraceRegex(begin!=end ? *begin : ""); });
  3442. }
  3443. static void checkLinuxIPv6Limits()
  3444. {
  3445. #ifdef __linux__
  3446. string line;
  3447. if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
  3448. int lim=std::stoi(line);
  3449. if(lim < 16384) {
  3450. g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
  3451. }
  3452. }
  3453. #endif
  3454. }
  3455. static void checkOrFixFDS()
  3456. {
  3457. unsigned int availFDs=getFilenumLimit();
  3458. unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
  3459. if(wantFDs > availFDs) {
  3460. unsigned int hardlimit= getFilenumLimit(true);
  3461. if(hardlimit >= wantFDs) {
  3462. setFilenumLimit(wantFDs);
  3463. g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
  3464. }
  3465. else {
  3466. int newval = (hardlimit - 25) / g_numWorkerThreads;
  3467. g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
  3468. g_maxMThreads = newval;
  3469. setFilenumLimit(hardlimit);
  3470. }
  3471. }
  3472. }
  3473. static void* recursorThread(unsigned int tid, const string& threadName);
  3474. static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
  3475. {
  3476. t_allowFrom = ng;
  3477. return nullptr;
  3478. }
  3479. int g_argc;
  3480. char** g_argv;
  3481. void parseACLs()
  3482. {
  3483. static bool l_initialized;
  3484. if(l_initialized) { // only reload configuration file on second call
  3485. string configname=::arg()["config-dir"]+"/recursor.conf";
  3486. if(::arg()["config-name"]!="") {
  3487. configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
  3488. }
  3489. cleanSlashes(configname);
  3490. if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
  3491. throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
  3492. ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
  3493. ::arg().preParseFile(configname.c_str(), "include-dir");
  3494. ::arg().preParse(g_argc, g_argv, "include-dir");
  3495. // then process includes
  3496. std::vector<std::string> extraConfigs;
  3497. ::arg().gatherIncludes(extraConfigs);
  3498. for(const std::string& fn : extraConfigs) {
  3499. if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
  3500. throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
  3501. if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
  3502. throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
  3503. }
  3504. ::arg().preParse(g_argc, g_argv, "allow-from-file");
  3505. ::arg().preParse(g_argc, g_argv, "allow-from");
  3506. }
  3507. std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
  3508. std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
  3509. if(!::arg()["allow-from-file"].empty()) {
  3510. string line;
  3511. ifstream ifs(::arg()["allow-from-file"].c_str());
  3512. if(!ifs) {
  3513. throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
  3514. }
  3515. string::size_type pos;
  3516. while(getline(ifs,line)) {
  3517. pos=line.find('#');
  3518. if(pos!=string::npos)
  3519. line.resize(pos);
  3520. trim(line);
  3521. if(line.empty())
  3522. continue;
  3523. allowFrom->addMask(line);
  3524. }
  3525. g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
  3526. }
  3527. else if(!::arg()["allow-from"].empty()) {
  3528. vector<string> ips;
  3529. stringtok(ips, ::arg()["allow-from"], ", ");
  3530. g_log<<Logger::Warning<<"Only allowing queries from: ";
  3531. for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
  3532. allowFrom->addMask(*i);
  3533. if(i!=ips.begin())
  3534. g_log<<Logger::Warning<<", ";
  3535. g_log<<Logger::Warning<<*i;
  3536. }
  3537. g_log<<Logger::Warning<<endl;
  3538. }
  3539. else {
  3540. if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
  3541. g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
  3542. allowFrom = nullptr;
  3543. }
  3544. g_initialAllowFrom = allowFrom;
  3545. broadcastFunction([=]{ return pleaseSupplantACLs(allowFrom); });
  3546. oldAllowFrom = nullptr;
  3547. l_initialized = true;
  3548. }
  3549. static void setupDelegationOnly()
  3550. {
  3551. vector<string> parts;
  3552. stringtok(parts, ::arg()["delegation-only"], ", \t");
  3553. for(const auto& p : parts) {
  3554. SyncRes::addDelegationOnly(DNSName(p));
  3555. }
  3556. }
  3557. static std::map<unsigned int, std::set<int> > parseCPUMap()
  3558. {
  3559. std::map<unsigned int, std::set<int> > result;
  3560. const std::string value = ::arg()["cpu-map"];
  3561. if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
  3562. g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
  3563. return result;
  3564. }
  3565. std::vector<std::string> parts;
  3566. stringtok(parts, value, " \t");
  3567. for(const auto& part : parts) {
  3568. if (part.find('=') == string::npos)
  3569. continue;
  3570. try {
  3571. auto headers = splitField(part, '=');
  3572. trim(headers.first);
  3573. trim(headers.second);
  3574. unsigned int threadId = pdns_stou(headers.first);
  3575. std::vector<std::string> cpus;
  3576. stringtok(cpus, headers.second, ",");
  3577. for(const auto& cpu : cpus) {
  3578. int cpuId = std::stoi(cpu);
  3579. result[threadId].insert(cpuId);
  3580. }
  3581. }
  3582. catch(const std::exception& e) {
  3583. g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
  3584. }
  3585. }
  3586. return result;
  3587. }
  3588. static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
  3589. {
  3590. const auto& cpuMapping = cpusMap.find(n);
  3591. if (cpuMapping != cpusMap.cend()) {
  3592. int rc = mapThreadToCPUList(tid, cpuMapping->second);
  3593. if (rc == 0) {
  3594. g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
  3595. for (const auto cpu : cpuMapping->second) {
  3596. g_log<<Logger::Info<<" "<<cpu;
  3597. }
  3598. g_log<<Logger::Info<<endl;
  3599. }
  3600. else {
  3601. g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
  3602. for (const auto cpu : cpuMapping->second) {
  3603. g_log<<Logger::Info<<" "<<cpu;
  3604. }
  3605. g_log<<Logger::Info<<strerror(rc)<<endl;
  3606. }
  3607. }
  3608. }
  3609. #ifdef NOD_ENABLED
  3610. static void setupNODThread()
  3611. {
  3612. if (g_nodEnabled) {
  3613. uint32_t num_cells = ::arg().asNum("new-domain-db-size");
  3614. t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
  3615. try {
  3616. t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
  3617. }
  3618. catch (const PDNSException& e) {
  3619. g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
  3620. _exit(1);
  3621. }
  3622. if (!t_nodDBp->init()) {
  3623. g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
  3624. _exit(1);
  3625. }
  3626. std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
  3627. t.detach();
  3628. g_nod_pbtag = ::arg()["new-domain-pb-tag"];
  3629. }
  3630. if (g_udrEnabled) {
  3631. uint32_t num_cells = ::arg().asNum("unique-response-db-size");
  3632. t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
  3633. try {
  3634. t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
  3635. }
  3636. catch (const PDNSException& e) {
  3637. g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
  3638. _exit(1);
  3639. }
  3640. if (!t_udrDBp->init()) {
  3641. g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
  3642. _exit(1);
  3643. }
  3644. std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
  3645. t.detach();
  3646. g_udr_pbtag = ::arg()["unique-response-pb-tag"];
  3647. }
  3648. }
  3649. static void parseNODWhitelist(const std::string& wlist)
  3650. {
  3651. vector<string> parts;
  3652. stringtok(parts, wlist, ",; ");
  3653. for(const auto& a : parts) {
  3654. g_nodDomainWL.add(DNSName(a));
  3655. }
  3656. }
  3657. static void setupNODGlobal()
  3658. {
  3659. // Setup NOD subsystem
  3660. g_nodEnabled = ::arg().mustDo("new-domain-tracking");
  3661. g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
  3662. g_nodLog = ::arg().mustDo("new-domain-log");
  3663. parseNODWhitelist(::arg()["new-domain-whitelist"]);
  3664. // Setup Unique DNS Response subsystem
  3665. g_udrEnabled = ::arg().mustDo("unique-response-tracking");
  3666. g_udrLog = ::arg().mustDo("unique-response-log");
  3667. }
  3668. #endif /* NOD_ENABLED */
  3669. static void checkSocketDir(void)
  3670. {
  3671. struct stat st;
  3672. string dir(::arg()["socket-dir"]);
  3673. string msg;
  3674. if (stat(dir.c_str(), &st) == -1) {
  3675. msg = "it does not exist or cannot access";
  3676. }
  3677. else if (!S_ISDIR(st.st_mode)) {
  3678. msg = "it is not a directory";
  3679. }
  3680. else if (access(dir.c_str(), R_OK | W_OK | X_OK) != 0) {
  3681. msg = "cannot read, write or search";
  3682. } else {
  3683. return;
  3684. }
  3685. g_log << Logger::Error << "Problem with socket directory " << dir << ": " << msg << "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl;
  3686. _exit(1);
  3687. }
  3688. static int serviceMain(int argc, char*argv[])
  3689. {
  3690. int ret = EXIT_SUCCESS;
  3691. g_log.setName(s_programname);
  3692. g_log.disableSyslog(::arg().mustDo("disable-syslog"));
  3693. g_log.setTimestamps(::arg().mustDo("log-timestamp"));
  3694. if(!::arg()["logging-facility"].empty()) {
  3695. int val=logFacilityToLOG(::arg().asNum("logging-facility") );
  3696. if(val >= 0)
  3697. g_log.setFacility(val);
  3698. else
  3699. g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
  3700. }
  3701. showProductVersion();
  3702. g_disthashseed=dns_random(0xffffffff);
  3703. checkLinuxIPv6Limits();
  3704. try {
  3705. pdns::parseQueryLocalAddress(::arg()["query-local-address"]);
  3706. if (!::arg()["query-local-address6"].empty()) {
  3707. // TODO remove in 4.5.0
  3708. g_log<<Logger::Warning<<"query-local-address6 is deprecated and will be removed in a future version. Please use query-local-address for IPv6 addresses as well"<<endl;
  3709. pdns::parseQueryLocalAddress(::arg()["query-local-address6"]);
  3710. }
  3711. }
  3712. catch(std::exception& e) {
  3713. g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
  3714. exit(99);
  3715. }
  3716. if(pdns::isQueryLocalAddressFamilyEnabled(AF_INET)) {
  3717. SyncRes::s_doIPv4=true;
  3718. g_log<<Logger::Warning<<"Enabling IPv4 transport for outgoing queries"<<endl;
  3719. }
  3720. else {
  3721. g_log<<Logger::Warning<<"NOT using IPv4 for outgoing queries - add an IPv4 address (like '0.0.0.0') to query-local-address to enable"<<endl;
  3722. }
  3723. if(pdns::isQueryLocalAddressFamilyEnabled(AF_INET6)) {
  3724. SyncRes::s_doIPv6=true;
  3725. g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
  3726. }
  3727. else {
  3728. g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - add an IPv6 address (like '::') to query-local-address to enable"<<endl;
  3729. }
  3730. if (!SyncRes::s_doIPv6 && !SyncRes::s_doIPv4) {
  3731. g_log<<Logger::Error<<"No outgoing addresses configured! Can not continue"<<endl;
  3732. exit(99);
  3733. }
  3734. // keep this ABOVE loadRecursorLuaConfig!
  3735. if(::arg()["dnssec"]=="off")
  3736. g_dnssecmode=DNSSECMode::Off;
  3737. else if(::arg()["dnssec"]=="process-no-validate")
  3738. g_dnssecmode=DNSSECMode::ProcessNoValidate;
  3739. else if(::arg()["dnssec"]=="process")
  3740. g_dnssecmode=DNSSECMode::Process;
  3741. else if(::arg()["dnssec"]=="validate")
  3742. g_dnssecmode=DNSSECMode::ValidateAll;
  3743. else if(::arg()["dnssec"]=="log-fail")
  3744. g_dnssecmode=DNSSECMode::ValidateForLog;
  3745. else {
  3746. g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
  3747. exit(1);
  3748. }
  3749. g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
  3750. if (g_signatureInceptionSkew < 0) {
  3751. g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
  3752. exit(1);
  3753. }
  3754. g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
  3755. g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
  3756. g_maxCacheEntries = ::arg().asNum("max-cache-entries");
  3757. g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
  3758. luaConfigDelayedThreads delayedLuaThreads;
  3759. try {
  3760. loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
  3761. }
  3762. catch (PDNSException &e) {
  3763. g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
  3764. exit(1);
  3765. }
  3766. parseACLs();
  3767. initPublicSuffixList(::arg()["public-suffix-list-file"]);
  3768. if(!::arg()["dont-query"].empty()) {
  3769. vector<string> ips;
  3770. stringtok(ips, ::arg()["dont-query"], ", ");
  3771. ips.push_back("0.0.0.0");
  3772. ips.push_back("::");
  3773. g_log<<Logger::Warning<<"Will not send queries to: ";
  3774. for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
  3775. SyncRes::addDontQuery(*i);
  3776. if(i!=ips.begin())
  3777. g_log<<Logger::Warning<<", ";
  3778. g_log<<Logger::Warning<<*i;
  3779. }
  3780. g_log<<Logger::Warning<<endl;
  3781. }
  3782. g_quiet=::arg().mustDo("quiet");
  3783. /* this needs to be done before parseACLs(), which call broadcastFunction() */
  3784. g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
  3785. if(g_weDistributeQueries) {
  3786. g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
  3787. }
  3788. setupDelegationOnly();
  3789. g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
  3790. if(::arg()["trace"]=="fail") {
  3791. SyncRes::setDefaultLogMode(SyncRes::Store);
  3792. }
  3793. else if(::arg().mustDo("trace")) {
  3794. SyncRes::setDefaultLogMode(SyncRes::Log);
  3795. ::arg().set("quiet")="no";
  3796. g_quiet=false;
  3797. g_dnssecLOG=true;
  3798. }
  3799. string myHostname = getHostname();
  3800. if (myHostname == "UNKNOWN"){
  3801. g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
  3802. myHostname = "";
  3803. }
  3804. SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
  3805. SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
  3806. SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
  3807. SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
  3808. SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
  3809. SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
  3810. SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
  3811. // Cap the packetcache-servfail-ttl to the packetcache-ttl
  3812. uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
  3813. SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
  3814. SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
  3815. SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
  3816. SyncRes::s_serverID=::arg()["server-id"];
  3817. SyncRes::s_maxqperq=::arg().asNum("max-qperq");
  3818. SyncRes::s_maxnsaddressqperq=::arg().asNum("max-ns-address-qperq");
  3819. SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
  3820. SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
  3821. SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
  3822. if(SyncRes::s_serverID.empty()) {
  3823. SyncRes::s_serverID = myHostname;
  3824. }
  3825. SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
  3826. SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
  3827. SyncRes::clearECSStats();
  3828. SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
  3829. SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
  3830. SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
  3831. SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
  3832. if (SyncRes::s_qnameminimization) {
  3833. // With an empty cache, a rev ipv6 query with dnssec enabled takes
  3834. // almost 100 queries. Default maxqperq is 60.
  3835. SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
  3836. }
  3837. SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
  3838. string value = ::arg()["nothing-below-nxdomain"];
  3839. if (value == "yes") {
  3840. SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
  3841. } else if (value == "no") {
  3842. SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
  3843. } else if (value != "dnssec") {
  3844. g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
  3845. exit(1);
  3846. }
  3847. if (!::arg().isEmpty("ecs-scope-zero-address")) {
  3848. ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
  3849. SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
  3850. }
  3851. else {
  3852. Netmask nm;
  3853. bool done = false;
  3854. auto addr = pdns::getNonAnyQueryLocalAddress(AF_INET);
  3855. if (addr.sin4.sin_family != 0) {
  3856. nm = Netmask(addr, 32);
  3857. done = true;
  3858. }
  3859. if (!done) {
  3860. addr = pdns::getNonAnyQueryLocalAddress(AF_INET6);
  3861. if (addr.sin4.sin_family != 0) {
  3862. nm = Netmask(addr, 128);
  3863. done = true;
  3864. }
  3865. }
  3866. if (!done) {
  3867. nm = Netmask(ComboAddress("127.0.0.1"), 32);
  3868. }
  3869. SyncRes::setECSScopeZeroAddress(nm);
  3870. }
  3871. SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
  3872. SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
  3873. g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
  3874. g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
  3875. g_xpfRRCode = ::arg().asNum("xpf-rr-code");
  3876. g_proxyProtocolACL.toMasks(::arg()["proxy-protocol-from"]);
  3877. g_proxyProtocolMaximumSize = ::arg().asNum("proxy-protocol-maximum-size");
  3878. if (!::arg()["dns64-prefix"].empty()) {
  3879. try {
  3880. auto dns64Prefix = Netmask(::arg()["dns64-prefix"]);
  3881. if (dns64Prefix.getBits() != 96) {
  3882. g_log << Logger::Error << "Invalid prefix for 'dns64-prefix', the current implementation only supports /96 prefixes: " << ::arg()["dns64-prefix"] << endl;
  3883. exit(1);
  3884. }
  3885. g_dns64Prefix = dns64Prefix.getNetwork();
  3886. g_dns64PrefixReverse = reverseNameFromIP(*g_dns64Prefix);
  3887. /* /96 is 24 nibbles + 2 for "ip6.arpa." */
  3888. while (g_dns64PrefixReverse.countLabels() > 26) {
  3889. g_dns64PrefixReverse.chopOff();
  3890. }
  3891. }
  3892. catch (const NetmaskException& ne) {
  3893. g_log << Logger::Error << "Invalid prefix '" << ::arg()["dns64-prefix"] << "' for 'dns64-prefix': " << ne.reason << endl;
  3894. exit(1);
  3895. }
  3896. }
  3897. g_networkTimeoutMsec = ::arg().asNum("network-timeout");
  3898. g_initialDomainMap = parseAuthAndForwards();
  3899. g_latencyStatSize=::arg().asNum("latency-statistic-size");
  3900. g_logCommonErrors=::arg().mustDo("log-common-errors");
  3901. g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
  3902. g_anyToTcp = ::arg().mustDo("any-to-tcp");
  3903. g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
  3904. g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
  3905. g_numDistributorThreads = ::arg().asNum("distributor-threads");
  3906. g_numWorkerThreads = ::arg().asNum("threads");
  3907. if (g_numWorkerThreads < 1) {
  3908. g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
  3909. g_numWorkerThreads = 1;
  3910. }
  3911. g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
  3912. g_maxMThreads = ::arg().asNum("max-mthreads");
  3913. int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
  3914. if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
  3915. g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
  3916. TCPConnection::s_maxInFlight = 10;
  3917. } else {
  3918. TCPConnection::s_maxInFlight = maxInFlight;
  3919. }
  3920. g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
  3921. g_statisticsInterval = ::arg().asNum("statistics-interval");
  3922. {
  3923. SuffixMatchNode dontThrottleNames;
  3924. vector<string> parts;
  3925. stringtok(parts, ::arg()["dont-throttle-names"], " ,");
  3926. for (const auto &p : parts) {
  3927. dontThrottleNames.add(DNSName(p));
  3928. }
  3929. g_dontThrottleNames.setState(std::move(dontThrottleNames));
  3930. parts.clear();
  3931. NetmaskGroup dontThrottleNetmasks;
  3932. stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
  3933. for (const auto &p : parts) {
  3934. dontThrottleNetmasks.addMask(Netmask(p));
  3935. }
  3936. g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
  3937. }
  3938. s_balancingFactor = ::arg().asDouble("distribution-load-factor");
  3939. if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
  3940. s_balancingFactor = 0.0;
  3941. g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
  3942. }
  3943. #ifdef SO_REUSEPORT
  3944. g_reusePort = ::arg().mustDo("reuseport");
  3945. #endif
  3946. s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
  3947. if (g_reusePort) {
  3948. if (g_weDistributeQueries) {
  3949. /* first thread is the handler, then distributors */
  3950. for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
  3951. auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
  3952. auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
  3953. makeUDPServerSockets(deferredAdds);
  3954. makeTCPServerSockets(deferredAdds, tcpSockets);
  3955. }
  3956. }
  3957. else {
  3958. /* first thread is the handler, there is no distributor here and workers are accepting queries */
  3959. for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
  3960. auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
  3961. auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
  3962. makeUDPServerSockets(deferredAdds);
  3963. makeTCPServerSockets(deferredAdds, tcpSockets);
  3964. }
  3965. }
  3966. }
  3967. else {
  3968. std::set<int> tcpSockets;
  3969. /* we don't have reuseport so we can only open one socket per
  3970. listening addr:port and everyone will listen on it */
  3971. makeUDPServerSockets(g_deferredAdds);
  3972. makeTCPServerSockets(g_deferredAdds, tcpSockets);
  3973. /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
  3974. needs to listen to the shared sockets */
  3975. if (g_weDistributeQueries) {
  3976. /* first thread is the handler, then distributors */
  3977. for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
  3978. s_threadInfos.at(threadId).tcpSockets = tcpSockets;
  3979. }
  3980. }
  3981. else {
  3982. /* first thread is the handler, there is no distributor here and workers are accepting queries */
  3983. for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
  3984. s_threadInfos.at(threadId).tcpSockets = tcpSockets;
  3985. }
  3986. }
  3987. }
  3988. #ifdef NOD_ENABLED
  3989. // Setup newly observed domain globals
  3990. setupNODGlobal();
  3991. #endif /* NOD_ENABLED */
  3992. int forks;
  3993. for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
  3994. if(!fork()) // we are child
  3995. break;
  3996. }
  3997. if(::arg().mustDo("daemon")) {
  3998. g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
  3999. g_log.toConsole(Logger::Critical);
  4000. daemonize();
  4001. }
  4002. if(Utility::getpid() == 1) {
  4003. /* We are running as pid 1, register sigterm and sigint handler
  4004. The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
  4005. It assumes that the processes running as pid 1 is an "init" like system.
  4006. For years, this was a safe assumption, but containers change that: in
  4007. most (all?) container implementations, the application itself is running
  4008. as pid 1. This means that sending signals to those applications, will not
  4009. be handled by default. Results might be "your container not responding
  4010. when asking it to stop", or "ctrl-c not working even when the app is
  4011. running in the foreground inside a container".
  4012. So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
  4013. signal(SIGTERM,termIntHandler);
  4014. signal(SIGINT,termIntHandler);
  4015. }
  4016. signal(SIGUSR1,usr1Handler);
  4017. signal(SIGUSR2,usr2Handler);
  4018. signal(SIGPIPE,SIG_IGN);
  4019. checkOrFixFDS();
  4020. #ifdef HAVE_LIBSODIUM
  4021. if (sodium_init() == -1) {
  4022. g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
  4023. exit(99);
  4024. }
  4025. #endif
  4026. openssl_thread_setup();
  4027. openssl_seed();
  4028. /* setup rng before chroot */
  4029. dns_random_init();
  4030. if(::arg()["server-id"].empty()) {
  4031. ::arg().set("server-id") = myHostname;
  4032. }
  4033. int newgid=0;
  4034. if(!::arg()["setgid"].empty())
  4035. newgid = strToGID(::arg()["setgid"]);
  4036. int newuid=0;
  4037. if(!::arg()["setuid"].empty())
  4038. newuid = strToUID(::arg()["setuid"]);
  4039. Utility::dropGroupPrivs(newuid, newgid);
  4040. if (!::arg()["chroot"].empty()) {
  4041. #ifdef HAVE_SYSTEMD
  4042. char *ns;
  4043. ns = getenv("NOTIFY_SOCKET");
  4044. if (ns != nullptr) {
  4045. g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
  4046. exit(1);
  4047. }
  4048. #endif
  4049. if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
  4050. int err = errno;
  4051. g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
  4052. exit(1);
  4053. }
  4054. else
  4055. g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
  4056. }
  4057. checkSocketDir();
  4058. s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
  4059. if(!s_pidfname.empty())
  4060. unlink(s_pidfname.c_str()); // remove possible old pid file
  4061. writePid();
  4062. makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
  4063. Utility::dropUserPrivs(newuid);
  4064. try {
  4065. /* we might still have capabilities remaining, for example if we have been started as root
  4066. without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
  4067. like CAP_NET_BIND_SERVICE.
  4068. */
  4069. dropCapabilities();
  4070. }
  4071. catch(const std::exception& e) {
  4072. g_log<<Logger::Warning<<e.what()<<endl;
  4073. }
  4074. startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
  4075. makeThreadPipes();
  4076. g_tcpTimeout=::arg().asNum("client-tcp-timeout");
  4077. g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
  4078. g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
  4079. s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
  4080. g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
  4081. blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
  4082. blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
  4083. blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
  4084. blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
  4085. if (::arg().mustDo("snmp-agent")) {
  4086. g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
  4087. g_snmpAgent->run();
  4088. }
  4089. int port = ::arg().asNum("udp-source-port-min");
  4090. if(port < 1024 || port > 65535){
  4091. g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
  4092. exit(99); // this isn't going to fix itself either
  4093. }
  4094. s_minUdpSourcePort = port;
  4095. port = ::arg().asNum("udp-source-port-max");
  4096. if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
  4097. g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
  4098. exit(99); // this isn't going to fix itself either
  4099. }
  4100. s_maxUdpSourcePort = port;
  4101. std::vector<string> parts {};
  4102. stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
  4103. for (const auto &part : parts)
  4104. {
  4105. port = std::stoi(part);
  4106. if(port < 1024 || port > 65535){
  4107. g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
  4108. exit(99); // this isn't going to fix itself either
  4109. }
  4110. s_avoidUdpSourcePorts.insert(port);
  4111. }
  4112. unsigned int currentThreadId = 1;
  4113. const auto cpusMap = parseCPUMap();
  4114. if(g_numThreads == 1) {
  4115. g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
  4116. #ifdef HAVE_SYSTEMD
  4117. sd_notify(0, "READY=1");
  4118. #endif
  4119. /* This thread handles the web server, carbon, statistics and the control channel */
  4120. auto& handlerInfos = s_threadInfos.at(0);
  4121. handlerInfos.isHandler = true;
  4122. handlerInfos.thread = std::thread(recursorThread, 0, "main");
  4123. setCPUMap(cpusMap, currentThreadId, pthread_self());
  4124. auto& infos = s_threadInfos.at(currentThreadId);
  4125. infos.isListener = true;
  4126. infos.isWorker = true;
  4127. recursorThread(currentThreadId++, "worker");
  4128. handlerInfos.thread.join();
  4129. if (handlerInfos.exitCode != 0) {
  4130. ret = handlerInfos.exitCode;
  4131. }
  4132. }
  4133. else {
  4134. if (g_weDistributeQueries) {
  4135. for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
  4136. auto& infos = s_threadInfos.at(currentThreadId + n);
  4137. infos.isListener = true;
  4138. }
  4139. }
  4140. for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
  4141. auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
  4142. infos.isListener = !g_weDistributeQueries;
  4143. infos.isWorker = true;
  4144. }
  4145. if (g_weDistributeQueries) {
  4146. g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
  4147. for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
  4148. auto& infos = s_threadInfos.at(currentThreadId);
  4149. infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
  4150. setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
  4151. }
  4152. }
  4153. g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
  4154. for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
  4155. auto& infos = s_threadInfos.at(currentThreadId);
  4156. infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
  4157. setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
  4158. }
  4159. #ifdef HAVE_SYSTEMD
  4160. sd_notify(0, "READY=1");
  4161. #endif
  4162. /* This thread handles the web server, carbon, statistics and the control channel */
  4163. auto& infos = s_threadInfos.at(0);
  4164. infos.isHandler = true;
  4165. infos.thread = std::thread(recursorThread, 0, "web+stat");
  4166. for (auto & ti : s_threadInfos) {
  4167. ti.thread.join();
  4168. if (ti.exitCode != 0) {
  4169. ret = ti.exitCode;
  4170. }
  4171. }
  4172. }
  4173. #ifdef HAVE_PROTOBUF
  4174. google::protobuf::ShutdownProtobufLibrary();
  4175. #endif /* HAVE_PROTOBUF */
  4176. return ret;
  4177. }
  4178. static void* recursorThread(unsigned int n, const string& threadName)
  4179. try
  4180. {
  4181. t_id=n;
  4182. auto& threadInfo = s_threadInfos.at(t_id);
  4183. static string threadPrefix = "pdns-r/";
  4184. setThreadName(threadPrefix + threadName);
  4185. SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
  4186. SyncRes::setDomainMap(g_initialDomainMap);
  4187. t_allowFrom = g_initialAllowFrom;
  4188. t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
  4189. t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
  4190. if (threadInfo.isHandler) {
  4191. if (!primeHints()) {
  4192. threadInfo.exitCode = EXIT_FAILURE;
  4193. RecursorControlChannel::stop = 1;
  4194. g_log<<Logger::Critical<<"Priming cache failed, stopping"<<endl;
  4195. return nullptr;
  4196. }
  4197. g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
  4198. }
  4199. t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
  4200. #ifdef NOD_ENABLED
  4201. if (threadInfo.isWorker)
  4202. setupNODThread();
  4203. #endif /* NOD_ENABLED */
  4204. /* the listener threads handle TCP queries */
  4205. if(threadInfo.isWorker || threadInfo.isListener) {
  4206. try {
  4207. if(!::arg()["lua-dns-script"].empty()) {
  4208. t_pdl = std::make_shared<RecursorLua4>();
  4209. t_pdl->loadFile(::arg()["lua-dns-script"]);
  4210. g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
  4211. }
  4212. }
  4213. catch(std::exception &e) {
  4214. g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
  4215. _exit(99);
  4216. }
  4217. }
  4218. unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
  4219. if(ringsize) {
  4220. t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
  4221. if(g_weDistributeQueries)
  4222. t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
  4223. else
  4224. t_remotes->set_capacity(ringsize);
  4225. t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
  4226. t_servfailremotes->set_capacity(ringsize);
  4227. t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
  4228. t_bogusremotes->set_capacity(ringsize);
  4229. t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
  4230. t_largeanswerremotes->set_capacity(ringsize);
  4231. t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
  4232. t_timeouts->set_capacity(ringsize);
  4233. t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
  4234. t_queryring->set_capacity(ringsize);
  4235. t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
  4236. t_servfailqueryring->set_capacity(ringsize);
  4237. t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
  4238. t_bogusqueryring->set_capacity(ringsize);
  4239. }
  4240. MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
  4241. threadInfo.mt = MT.get();
  4242. #ifdef HAVE_PROTOBUF
  4243. /* start protobuf export threads if needed */
  4244. auto luaconfsLocal = g_luaconfs.getLocal();
  4245. checkProtobufExport(luaconfsLocal);
  4246. checkOutgoingProtobufExport(luaconfsLocal);
  4247. #endif /* HAVE_PROTOBUF */
  4248. #ifdef HAVE_FSTRM
  4249. checkFrameStreamExport(luaconfsLocal);
  4250. #endif
  4251. PacketID pident;
  4252. t_fdm=getMultiplexer();
  4253. RecursorWebServer *rws = nullptr;
  4254. if(threadInfo.isHandler) {
  4255. if(::arg().mustDo("webserver")) {
  4256. g_log<<Logger::Warning << "Enabling web server" << endl;
  4257. try {
  4258. rws = new RecursorWebServer(t_fdm);
  4259. }
  4260. catch(PDNSException &e) {
  4261. g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
  4262. exit(99);
  4263. }
  4264. }
  4265. g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
  4266. }
  4267. else {
  4268. t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
  4269. t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
  4270. if (threadInfo.isListener) {
  4271. if (g_reusePort) {
  4272. /* then every listener has its own FDs */
  4273. for(const auto& deferred : threadInfo.deferredAdds) {
  4274. t_fdm->addReadFD(deferred.first, deferred.second);
  4275. }
  4276. }
  4277. else {
  4278. /* otherwise all listeners are listening on the same ones */
  4279. for(const auto& deferred : g_deferredAdds) {
  4280. t_fdm->addReadFD(deferred.first, deferred.second);
  4281. }
  4282. }
  4283. }
  4284. }
  4285. registerAllStats();
  4286. if(threadInfo.isHandler) {
  4287. t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
  4288. }
  4289. unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
  4290. bool listenOnTCP(true);
  4291. time_t last_stat = 0;
  4292. time_t last_carbon=0, last_lua_maintenance=0;
  4293. time_t carbonInterval=::arg().asNum("carbon-interval");
  4294. time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
  4295. counter.store(0); // used to periodically execute certain tasks
  4296. while (!RecursorControlChannel::stop) {
  4297. while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
  4298. // Use primes, it avoid not being scheduled in cases where the counter has a regular pattern.
  4299. // We want to call handler thread often, it gets scheduled about 2 times per second
  4300. if ((threadInfo.isHandler && counter % 11 == 0) || counter % 499 == 0) {
  4301. MT->makeThread(houseKeeping, 0);
  4302. }
  4303. if(!(counter%55)) {
  4304. typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
  4305. expired_t expired=t_fdm->getTimeouts(g_now);
  4306. for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
  4307. shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
  4308. if(g_logCommonErrors)
  4309. g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
  4310. t_fdm->removeReadFD(i->first);
  4311. }
  4312. }
  4313. counter++;
  4314. if(threadInfo.isHandler) {
  4315. if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
  4316. doStats();
  4317. last_stat = g_now.tv_sec;
  4318. }
  4319. Utility::gettimeofday(&g_now, 0);
  4320. if((g_now.tv_sec - last_carbon) >= carbonInterval) {
  4321. MT->makeThread(doCarbonDump, 0);
  4322. last_carbon = g_now.tv_sec;
  4323. }
  4324. }
  4325. if (t_pdl != nullptr) {
  4326. // lua-dns-script directive is present, call the maintenance callback if needed
  4327. /* remember that the listener threads handle TCP queries */
  4328. if (threadInfo.isWorker || threadInfo.isListener) {
  4329. // Only on threads processing queries
  4330. if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
  4331. t_pdl->maintenance();
  4332. last_lua_maintenance = g_now.tv_sec;
  4333. }
  4334. }
  4335. }
  4336. t_fdm->run(&g_now);
  4337. // 'run' updates g_now for us
  4338. if(threadInfo.isListener) {
  4339. if(listenOnTCP) {
  4340. if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
  4341. for(const auto fd : threadInfo.tcpSockets) {
  4342. t_fdm->removeReadFD(fd);
  4343. }
  4344. listenOnTCP=false;
  4345. }
  4346. }
  4347. else {
  4348. if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
  4349. for(const auto fd : threadInfo.tcpSockets) {
  4350. t_fdm->addReadFD(fd, handleNewTCPQuestion);
  4351. }
  4352. listenOnTCP=true;
  4353. }
  4354. }
  4355. }
  4356. }
  4357. delete rws;
  4358. delete t_fdm;
  4359. return 0;
  4360. }
  4361. catch(PDNSException &ae) {
  4362. g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
  4363. return 0;
  4364. }
  4365. catch(std::exception &e) {
  4366. g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
  4367. return 0;
  4368. }
  4369. catch(...) {
  4370. g_log<<Logger::Error<<"any other exception in main: "<<endl;
  4371. return 0;
  4372. }
  4373. int main(int argc, char **argv)
  4374. {
  4375. g_argc = argc;
  4376. g_argv = argv;
  4377. g_stats.startupTime=time(0);
  4378. Utility::srandom();
  4379. versionSetProduct(ProductRecursor);
  4380. reportBasicTypes();
  4381. reportOtherTypes();
  4382. int ret = EXIT_SUCCESS;
  4383. try {
  4384. ::arg().set("stack-size","stack size per mthread")="200000";
  4385. ::arg().set("soa-minimum-ttl","Don't change")="0";
  4386. ::arg().set("no-shuffle","Don't change")="off";
  4387. ::arg().set("local-port","port to listen on")="53";
  4388. ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
  4389. ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
  4390. ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
  4391. ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
  4392. ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
  4393. ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
  4394. ::arg().set("daemon","Operate as a daemon")="no";
  4395. ::arg().setSwitch("write-pid","Write a PID file")="yes";
  4396. ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
  4397. ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
  4398. ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
  4399. ::arg().set("log-common-errors","If we should log rather common errors")="no";
  4400. ::arg().set("chroot","switch to chroot jail")="";
  4401. ::arg().set("setgid","If set, change group id to this gid for more security"
  4402. #ifdef HAVE_SYSTEMD
  4403. #define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
  4404. SYSTEMD_SETID_MSG
  4405. #endif
  4406. )="";
  4407. ::arg().set("setuid","If set, change user id to this uid for more security"
  4408. #ifdef HAVE_SYSTEMD
  4409. SYSTEMD_SETID_MSG
  4410. #endif
  4411. )="";
  4412. ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
  4413. ::arg().set("threads", "Launch this number of threads")="2";
  4414. ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
  4415. ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
  4416. ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
  4417. ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
  4418. ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
  4419. ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
  4420. ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
  4421. ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
  4422. ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
  4423. ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
  4424. ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
  4425. ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
  4426. ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
  4427. ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
  4428. ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
  4429. ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
  4430. ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
  4431. ::arg().set("quiet","Suppress logging of questions and answers")="";
  4432. ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
  4433. ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
  4434. ::arg().set("socket-owner","Owner of socket")="";
  4435. ::arg().set("socket-group","Group of socket")="";
  4436. ::arg().set("socket-mode", "Permissions for socket")="";
  4437. ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted"
  4438. #ifdef HAVE_SYSTEMD
  4439. + ". Set to the RUNTIME_DIRECTORY environment variable when that variable has a value (e.g. under systemd).")="";
  4440. auto runtimeDir = getenv("RUNTIME_DIRECTORY");
  4441. if (runtimeDir != nullptr) {
  4442. ::arg().set("socket-dir") = runtimeDir;
  4443. }
  4444. #else
  4445. )="";
  4446. #endif
  4447. ::arg().set("delegation-only","Which domains we only accept delegations from")="";
  4448. ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
  4449. ::arg().set("query-local-address6","DEPRECATED: Use query-local-address for IPv6 as well. Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
  4450. ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
  4451. ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
  4452. ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
  4453. ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
  4454. ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
  4455. ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
  4456. ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
  4457. ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
  4458. ::arg().set("hint-file", "If set, load root hints from this file")="";
  4459. ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
  4460. ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
  4461. ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
  4462. ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
  4463. ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
  4464. ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
  4465. ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
  4466. ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
  4467. ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
  4468. ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
  4469. ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
  4470. ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
  4471. ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
  4472. ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
  4473. ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
  4474. ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
  4475. ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
  4476. ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
  4477. ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
  4478. ::arg().set("lua-config-file", "More powerful configuration options")="";
  4479. ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
  4480. ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
  4481. ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
  4482. ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
  4483. ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
  4484. ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
  4485. ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
  4486. ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
  4487. ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
  4488. ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
  4489. ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
  4490. ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
  4491. ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
  4492. ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
  4493. ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
  4494. ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
  4495. ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
  4496. ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
  4497. ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
  4498. ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
  4499. ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
  4500. ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
  4501. ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
  4502. ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
  4503. ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
  4504. ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
  4505. ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
  4506. ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
  4507. ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
  4508. ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
  4509. ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
  4510. ::arg().set("max-ns-address-qperq", "Maximum outgoing NS address queries per query")="10";
  4511. ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
  4512. ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
  4513. ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
  4514. ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
  4515. ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
  4516. ::arg().set("include-dir","Include *.conf files from this directory")="";
  4517. ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
  4518. ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
  4519. ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
  4520. ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
  4521. std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
  4522. for (size_t idx = 0; idx < 32; idx++) {
  4523. defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
  4524. }
  4525. for (size_t idx = 0; idx < 128; idx++) {
  4526. defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
  4527. }
  4528. ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
  4529. ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
  4530. ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
  4531. ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
  4532. ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
  4533. ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
  4534. ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
  4535. ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
  4536. ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
  4537. ::arg().set("xpf-rr-code","XPF option code to use")="0";
  4538. ::arg().set("proxy-protocol-from", "A Proxy Protocol header is only allowed from these subnets")="";
  4539. ::arg().set("proxy-protocol-maximum-size", "The maximum size of a proxy protocol payload, including the TLV values")="512";
  4540. ::arg().set("dns64-prefix", "DNS64 prefix")="";
  4541. ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
  4542. ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
  4543. ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
  4544. ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
  4545. ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
  4546. ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
  4547. ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
  4548. ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
  4549. ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
  4550. ::arg().set("record-cache-shards", "Number of shards in the record cache")="1024";
  4551. #ifdef NOD_ENABLED
  4552. ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
  4553. ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
  4554. ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
  4555. ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
  4556. ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
  4557. ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
  4558. ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
  4559. ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
  4560. ::arg().set("unique-response-log", "Log unique responses")="yes";
  4561. ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
  4562. ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
  4563. ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
  4564. #endif /* NOD_ENABLED */
  4565. ::arg().setCmd("help","Provide a helpful message");
  4566. ::arg().setCmd("version","Print version string");
  4567. ::arg().setCmd("config","Output blank configuration");
  4568. ::arg().setDefaults();
  4569. g_log.toConsole(Logger::Info);
  4570. ::arg().laxParse(argc,argv); // do a lax parse
  4571. if(::arg().mustDo("version")) {
  4572. showProductVersion();
  4573. showBuildConfiguration();
  4574. exit(0);
  4575. }
  4576. string configname=::arg()["config-dir"]+"/recursor.conf";
  4577. if(::arg()["config-name"]!="") {
  4578. configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
  4579. s_programname+="-"+::arg()["config-name"];
  4580. }
  4581. cleanSlashes(configname);
  4582. if(!::arg().getCommands().empty()) {
  4583. cerr<<"Fatal: non-option";
  4584. if (::arg().getCommands().size() > 1) {
  4585. cerr<<"s";
  4586. }
  4587. cerr<<" (";
  4588. bool first = true;
  4589. for (const auto& c : ::arg().getCommands()) {
  4590. if (!first) {
  4591. cerr<<", ";
  4592. }
  4593. first = false;
  4594. cerr<<c;
  4595. }
  4596. cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
  4597. exit(99);
  4598. }
  4599. if(::arg().mustDo("config")) {
  4600. cout<<::arg().configstring(false, true);
  4601. exit(0);
  4602. }
  4603. if(!::arg().file(configname.c_str()))
  4604. g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
  4605. ::arg().parse(argc,argv);
  4606. if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
  4607. g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
  4608. exit(EXIT_FAILURE);
  4609. }
  4610. if (::arg()["socket-dir"].empty()) {
  4611. if (::arg()["chroot"].empty())
  4612. ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
  4613. else
  4614. ::arg().set("socket-dir") = "/";
  4615. }
  4616. ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
  4617. if(::arg().asNum("threads")==1) {
  4618. if (::arg().mustDo("pdns-distributes-queries")) {
  4619. g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
  4620. ::arg().set("pdns-distributes-queries")="no";
  4621. }
  4622. }
  4623. if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
  4624. g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
  4625. ::arg().set("distributor-threads")="1";
  4626. }
  4627. if (!::arg().mustDo("pdns-distributes-queries")) {
  4628. ::arg().set("distributor-threads")="0";
  4629. }
  4630. if(::arg().mustDo("help")) {
  4631. cout<<"syntax:"<<endl<<endl;
  4632. cout<<::arg().helpstring(::arg()["help"])<<endl;
  4633. exit(0);
  4634. }
  4635. s_RC = std::unique_ptr<MemRecursorCache>(new MemRecursorCache(::arg().asNum("record-cache-shards")));
  4636. Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
  4637. if (logUrgency < Logger::Error)
  4638. logUrgency = Logger::Error;
  4639. if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
  4640. logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
  4641. }
  4642. g_log.setLoglevel(logUrgency);
  4643. g_log.toConsole(logUrgency);
  4644. ret = serviceMain(argc, argv);
  4645. }
  4646. catch(PDNSException &ae) {
  4647. g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
  4648. ret=EXIT_FAILURE;
  4649. }
  4650. catch(std::exception &e) {
  4651. g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
  4652. ret=EXIT_FAILURE;
  4653. }
  4654. catch(...) {
  4655. g_log<<Logger::Error<<"any other exception in main: "<<endl;
  4656. ret=EXIT_FAILURE;
  4657. }
  4658. return ret;
  4659. }