You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

439 lines
12 KiB

  1. /*
  2. * This file is part of PowerDNS or dnsdist.
  3. * Copyright -- PowerDNS.COM B.V. and its contributors
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of version 2 of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * In addition, for the avoidance of any doubt, permission is granted to
  10. * link this program with OpenSSL and to (re)distribute the binaries
  11. * produced as the result of such linking.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21. */
  22. #ifdef HAVE_CONFIG_H
  23. #include "config.h"
  24. #endif
  25. #include "iputils.hh"
  26. #include <sys/socket.h>
  27. /** these functions provide a very lightweight wrapper to the Berkeley sockets API. Errors -> exceptions! */
  28. static void RuntimeError(const boost::format& fmt)
  29. {
  30. throw runtime_error(fmt.str());
  31. }
  32. static void NetworkErr(const boost::format& fmt)
  33. {
  34. throw NetworkError(fmt.str());
  35. }
  36. int SSocket(int family, int type, int flags)
  37. {
  38. int ret = socket(family, type, flags);
  39. if(ret < 0)
  40. RuntimeError(boost::format("creating socket of type %d: %s") % family % stringerror());
  41. return ret;
  42. }
  43. int SConnect(int sockfd, const ComboAddress& remote)
  44. {
  45. int ret = connect(sockfd, reinterpret_cast<const struct sockaddr*>(&remote), remote.getSocklen());
  46. if(ret < 0) {
  47. int savederrno = errno;
  48. RuntimeError(boost::format("connecting socket to %s: %s") % remote.toStringWithPort() % strerror(savederrno));
  49. }
  50. return ret;
  51. }
  52. int SConnectWithTimeout(int sockfd, const ComboAddress& remote, int timeout)
  53. {
  54. int ret = connect(sockfd, reinterpret_cast<const struct sockaddr*>(&remote), remote.getSocklen());
  55. if(ret < 0) {
  56. int savederrno = errno;
  57. if (savederrno == EINPROGRESS) {
  58. if (timeout <= 0) {
  59. return savederrno;
  60. }
  61. /* we wait until the connection has been established */
  62. bool error = false;
  63. bool disconnected = false;
  64. int res = waitForRWData(sockfd, false, timeout, 0, &error, &disconnected);
  65. if (res == 1) {
  66. if (error) {
  67. savederrno = 0;
  68. socklen_t errlen = sizeof(savederrno);
  69. if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, (void *)&savederrno, &errlen) == 0) {
  70. NetworkErr(boost::format("connecting to %s failed: %s") % remote.toStringWithPort() % string(strerror(savederrno)));
  71. }
  72. else {
  73. NetworkErr(boost::format("connecting to %s failed") % remote.toStringWithPort());
  74. }
  75. }
  76. if (disconnected) {
  77. NetworkErr(boost::format("%s closed the connection") % remote.toStringWithPort());
  78. }
  79. return 0;
  80. }
  81. else if (res == 0) {
  82. NetworkErr(boost::format("timeout while connecting to %s") % remote.toStringWithPort());
  83. } else if (res < 0) {
  84. savederrno = errno;
  85. NetworkErr(boost::format("waiting to connect to %s: %s") % remote.toStringWithPort() % string(strerror(savederrno)));
  86. }
  87. }
  88. else {
  89. NetworkErr(boost::format("connecting to %s: %s") % remote.toStringWithPort() % string(strerror(savederrno)));
  90. }
  91. }
  92. return 0;
  93. }
  94. int SBind(int sockfd, const ComboAddress& local)
  95. {
  96. int ret = bind(sockfd, (struct sockaddr*)&local, local.getSocklen());
  97. if(ret < 0) {
  98. int savederrno = errno;
  99. RuntimeError(boost::format("binding socket to %s: %s") % local.toStringWithPort() % strerror(savederrno));
  100. }
  101. return ret;
  102. }
  103. int SAccept(int sockfd, ComboAddress& remote)
  104. {
  105. socklen_t remlen = remote.getSocklen();
  106. int ret = accept(sockfd, (struct sockaddr*)&remote, &remlen);
  107. if(ret < 0)
  108. RuntimeError(boost::format("accepting new connection on socket: %s") % stringerror());
  109. return ret;
  110. }
  111. int SListen(int sockfd, int limit)
  112. {
  113. int ret = listen(sockfd, limit);
  114. if(ret < 0)
  115. RuntimeError(boost::format("setting socket to listen: %s") % stringerror());
  116. return ret;
  117. }
  118. int SSetsockopt(int sockfd, int level, int opname, int value)
  119. {
  120. int ret = setsockopt(sockfd, level, opname, &value, sizeof(value));
  121. if(ret < 0)
  122. RuntimeError(boost::format("setsockopt for level %d and opname %d to %d failed: %s") % level % opname % value % stringerror());
  123. return ret;
  124. }
  125. void setSocketIgnorePMTU(int sockfd)
  126. {
  127. #if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
  128. #ifdef IP_PMTUDISC_OMIT
  129. /* Linux 3.15+ has IP_PMTUDISC_OMIT, which discards PMTU information to prevent
  130. poisoning, but still allows fragmentation if the packet size exceeds the
  131. outgoing interface MTU, which is good.
  132. */
  133. try {
  134. SSetsockopt(sockfd, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_OMIT);
  135. return;
  136. }
  137. catch(const std::exception& e) {
  138. /* failed, let's try IP_PMTUDISC_DONT instead */
  139. }
  140. #endif /* IP_PMTUDISC_OMIT */
  141. /* IP_PMTUDISC_DONT disables Path MTU discovery */
  142. SSetsockopt(sockfd, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
  143. #endif /* defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) */
  144. }
  145. bool setReusePort(int sockfd)
  146. {
  147. #if defined(SO_REUSEPORT_LB)
  148. try {
  149. SSetsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
  150. return true;
  151. }
  152. catch (const std::exception& e) {
  153. return false;
  154. }
  155. #elif defined(SO_REUSEPORT)
  156. try {
  157. SSetsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, 1);
  158. return true;
  159. }
  160. catch (const std::exception& e) {
  161. return false;
  162. }
  163. #endif
  164. return false;
  165. }
  166. bool HarvestTimestamp(struct msghdr* msgh, struct timeval* tv)
  167. {
  168. #ifdef SO_TIMESTAMP
  169. struct cmsghdr *cmsg;
  170. for (cmsg = CMSG_FIRSTHDR(msgh); cmsg != NULL; cmsg = CMSG_NXTHDR(msgh,cmsg)) {
  171. if ((cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SO_TIMESTAMP || cmsg->cmsg_type == SCM_TIMESTAMP) &&
  172. CMSG_LEN(sizeof(*tv)) == cmsg->cmsg_len) {
  173. memcpy(tv, CMSG_DATA(cmsg), sizeof(*tv));
  174. return true;
  175. }
  176. }
  177. #endif
  178. return false;
  179. }
  180. bool HarvestDestinationAddress(const struct msghdr* msgh, ComboAddress* destination)
  181. {
  182. destination->reset();
  183. #ifdef __NetBSD__
  184. struct cmsghdr* cmsg;
  185. #else
  186. const struct cmsghdr* cmsg;
  187. #endif
  188. for (cmsg = CMSG_FIRSTHDR(msgh); cmsg != NULL; cmsg = CMSG_NXTHDR(const_cast<struct msghdr*>(msgh), const_cast<struct cmsghdr*>(cmsg))) {
  189. #if defined(IP_PKTINFO)
  190. if ((cmsg->cmsg_level == IPPROTO_IP) && (cmsg->cmsg_type == IP_PKTINFO)) {
  191. struct in_pktinfo *i = (struct in_pktinfo *) CMSG_DATA(cmsg);
  192. destination->sin4.sin_addr = i->ipi_addr;
  193. destination->sin4.sin_family = AF_INET;
  194. return true;
  195. }
  196. #elif defined(IP_RECVDSTADDR)
  197. if ((cmsg->cmsg_level == IPPROTO_IP) && (cmsg->cmsg_type == IP_RECVDSTADDR)) {
  198. struct in_addr *i = (struct in_addr *) CMSG_DATA(cmsg);
  199. destination->sin4.sin_addr = *i;
  200. destination->sin4.sin_family = AF_INET;
  201. return true;
  202. }
  203. #endif
  204. if ((cmsg->cmsg_level == IPPROTO_IPV6) && (cmsg->cmsg_type == IPV6_PKTINFO)) {
  205. struct in6_pktinfo *i = (struct in6_pktinfo *) CMSG_DATA(cmsg);
  206. destination->sin6.sin6_addr = i->ipi6_addr;
  207. destination->sin4.sin_family = AF_INET6;
  208. return true;
  209. }
  210. }
  211. return false;
  212. }
  213. bool IsAnyAddress(const ComboAddress& addr)
  214. {
  215. if(addr.sin4.sin_family == AF_INET)
  216. return addr.sin4.sin_addr.s_addr == 0;
  217. else if(addr.sin4.sin_family == AF_INET6)
  218. return !memcmp(&addr.sin6.sin6_addr, &in6addr_any, sizeof(addr.sin6.sin6_addr));
  219. return false;
  220. }
  221. ssize_t sendfromto(int sock, const char* data, size_t len, int flags, const ComboAddress& from, const ComboAddress& to)
  222. {
  223. struct msghdr msgh;
  224. struct iovec iov;
  225. cmsgbuf_aligned cbuf;
  226. /* Set up iov and msgh structures. */
  227. memset(&msgh, 0, sizeof(struct msghdr));
  228. iov.iov_base = (void*)data;
  229. iov.iov_len = len;
  230. msgh.msg_iov = &iov;
  231. msgh.msg_iovlen = 1;
  232. msgh.msg_name = (struct sockaddr*)&to;
  233. msgh.msg_namelen = to.getSocklen();
  234. if(from.sin4.sin_family) {
  235. addCMsgSrcAddr(&msgh, &cbuf, &from, 0);
  236. }
  237. else {
  238. msgh.msg_control=NULL;
  239. }
  240. return sendmsg(sock, &msgh, flags);
  241. }
  242. // be careful: when using this for receive purposes, make sure addr->sin4.sin_family is set appropriately so getSocklen works!
  243. // be careful: when using this function for *send* purposes, be sure to set cbufsize to 0!
  244. // be careful: if you don't call addCMsgSrcAddr after fillMSGHdr, make sure to set msg_control to NULL
  245. void fillMSGHdr(struct msghdr* msgh, struct iovec* iov, cmsgbuf_aligned* cbuf, size_t cbufsize, char* data, size_t datalen, ComboAddress* addr)
  246. {
  247. iov->iov_base = data;
  248. iov->iov_len = datalen;
  249. memset(msgh, 0, sizeof(struct msghdr));
  250. msgh->msg_control = cbuf;
  251. msgh->msg_controllen = cbufsize;
  252. msgh->msg_name = addr;
  253. msgh->msg_namelen = addr->getSocklen();
  254. msgh->msg_iov = iov;
  255. msgh->msg_iovlen = 1;
  256. msgh->msg_flags = 0;
  257. }
  258. // warning: various parts of PowerDNS assume 'truncate' will never throw
  259. void ComboAddress::truncate(unsigned int bits) noexcept
  260. {
  261. uint8_t* start;
  262. int len=4;
  263. if(sin4.sin_family==AF_INET) {
  264. if(bits >= 32)
  265. return;
  266. start = (uint8_t*)&sin4.sin_addr.s_addr;
  267. len=4;
  268. }
  269. else {
  270. if(bits >= 128)
  271. return;
  272. start = (uint8_t*)&sin6.sin6_addr.s6_addr;
  273. len=16;
  274. }
  275. auto tozero= len*8 - bits; // if set to 22, this will clear 1 byte, as it should
  276. memset(start + len - tozero/8, 0, tozero/8); // blot out the whole bytes on the right
  277. auto bitsleft=tozero % 8; // 2 bits left to clear
  278. // a b c d, to truncate to 22 bits, we just zeroed 'd' and need to zero 2 bits from c
  279. // so and by '11111100', which is ~((1<<2)-1) = ~3
  280. uint8_t* place = start + len - 1 - tozero/8;
  281. *place &= (~((1<<bitsleft)-1));
  282. }
  283. size_t sendMsgWithOptions(int fd, const char* buffer, size_t len, const ComboAddress* dest, const ComboAddress* local, unsigned int localItf, int flags)
  284. {
  285. struct msghdr msgh;
  286. struct iovec iov;
  287. cmsgbuf_aligned cbuf;
  288. /* Set up iov and msgh structures. */
  289. memset(&msgh, 0, sizeof(struct msghdr));
  290. msgh.msg_control = nullptr;
  291. msgh.msg_controllen = 0;
  292. if (dest) {
  293. msgh.msg_name = reinterpret_cast<void*>(const_cast<ComboAddress*>(dest));
  294. msgh.msg_namelen = dest->getSocklen();
  295. }
  296. else {
  297. msgh.msg_name = nullptr;
  298. msgh.msg_namelen = 0;
  299. }
  300. msgh.msg_flags = 0;
  301. if (localItf != 0 && local) {
  302. addCMsgSrcAddr(&msgh, &cbuf, local, localItf);
  303. }
  304. iov.iov_base = reinterpret_cast<void*>(const_cast<char*>(buffer));
  305. iov.iov_len = len;
  306. msgh.msg_iov = &iov;
  307. msgh.msg_iovlen = 1;
  308. msgh.msg_flags = 0;
  309. size_t sent = 0;
  310. bool firstTry = true;
  311. do {
  312. #ifdef MSG_FASTOPEN
  313. if (flags & MSG_FASTOPEN && firstTry == false) {
  314. flags &= ~MSG_FASTOPEN;
  315. }
  316. #endif /* MSG_FASTOPEN */
  317. ssize_t res = sendmsg(fd, &msgh, flags);
  318. if (res > 0) {
  319. size_t written = static_cast<size_t>(res);
  320. sent += written;
  321. if (sent == len) {
  322. return sent;
  323. }
  324. /* partial write */
  325. firstTry = false;
  326. iov.iov_len -= written;
  327. iov.iov_base = reinterpret_cast<void*>(reinterpret_cast<char*>(iov.iov_base) + written);
  328. written = 0;
  329. }
  330. else if (res == 0) {
  331. return res;
  332. }
  333. else if (res == -1) {
  334. int err = errno;
  335. if (err == EINTR) {
  336. continue;
  337. }
  338. else if (err == EAGAIN || err == EWOULDBLOCK || err == EINPROGRESS || err == ENOTCONN) {
  339. /* EINPROGRESS might happen with non blocking socket,
  340. especially with TCP Fast Open */
  341. return sent;
  342. }
  343. else {
  344. unixDie("failed in sendMsgWithTimeout");
  345. }
  346. }
  347. }
  348. while (true);
  349. return 0;
  350. }
  351. template class NetmaskTree<bool>;
  352. /* requires a non-blocking socket.
  353. On Linux, we could use MSG_DONTWAIT on a blocking socket
  354. but this is not portable.
  355. */
  356. bool isTCPSocketUsable(int sock)
  357. {
  358. int err = 0;
  359. char buf = '\0';
  360. size_t buf_size = sizeof(buf);
  361. do {
  362. ssize_t got = recv(sock, &buf, buf_size, MSG_PEEK);
  363. if (got > 0) {
  364. /* socket is usable, some data is even waiting to be read */
  365. return true;
  366. }
  367. else if (got == 0) {
  368. /* other end has closed the socket */
  369. return false;
  370. }
  371. else {
  372. err = errno;
  373. if (err == EAGAIN || err == EWOULDBLOCK) {
  374. /* socket is usable, no data waiting */
  375. return true;
  376. }
  377. else {
  378. if (err != EINTR) {
  379. /* something is wrong, could be ECONNRESET,
  380. ENOTCONN, EPIPE, but anyway this socket is
  381. not usable. */
  382. return false;
  383. }
  384. }
  385. }
  386. } while (err == EINTR);
  387. return false;
  388. }