You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

455 lines
20 KiB

  1. /*
  2. * This file is part of PowerDNS or dnsdist.
  3. * Copyright -- PowerDNS.COM B.V. and its contributors
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of version 2 of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * In addition, for the avoidance of any doubt, permission is granted to
  10. * link this program with OpenSSL and to (re)distribute the binaries
  11. * produced as the result of such linking.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21. */
  22. #pragma once
  23. #include <string>
  24. #include <unordered_map>
  25. #include <vector>
  26. #include <inttypes.h>
  27. #include <unistd.h>
  28. #include <atomic>
  29. // Metric types for Prometheus
  30. enum class PrometheusMetricType : int
  31. {
  32. counter = 1,
  33. gauge = 2
  34. };
  35. // Keeps additional information about metrics
  36. struct MetricDefinition
  37. {
  38. MetricDefinition(const PrometheusMetricType& prometheusType_, const std::string& description_)
  39. {
  40. prometheusType = prometheusType_;
  41. description = description_;
  42. }
  43. MetricDefinition() = default;
  44. // Metric description
  45. std::string description;
  46. // Metric type for Prometheus
  47. PrometheusMetricType prometheusType;
  48. };
  49. class MetricDefinitionStorage
  50. {
  51. public:
  52. // Return metric definition by name
  53. bool getMetricDetails(const std::string& metricName, MetricDefinition& metric)
  54. {
  55. auto metricDetailsIter = metrics.find(metricName);
  56. if (metricDetailsIter == metrics.end()) {
  57. return false;
  58. }
  59. metric = metricDetailsIter->second;
  60. return true;
  61. };
  62. // Return string representation of Prometheus metric type
  63. std::string getPrometheusStringMetricType(const PrometheusMetricType& metricType)
  64. {
  65. switch (metricType) {
  66. case PrometheusMetricType::counter:
  67. return "counter";
  68. break;
  69. case PrometheusMetricType::gauge:
  70. return "gauge";
  71. break;
  72. default:
  73. return "";
  74. break;
  75. }
  76. };
  77. private:
  78. // Description and types for prometheus output of stats
  79. std::map<std::string, MetricDefinition> metrics = {
  80. {"all-outqueries",
  81. MetricDefinition(PrometheusMetricType::counter,
  82. "Number of outgoing UDP queries since starting")},
  83. {"answers-slow",
  84. MetricDefinition(PrometheusMetricType::counter,
  85. "Number of queries answered after 1 second")},
  86. {"answers0-1",
  87. MetricDefinition(PrometheusMetricType::counter,
  88. "Number of queries answered within 1 millisecond")},
  89. {"answers1-10",
  90. MetricDefinition(PrometheusMetricType::counter,
  91. "Number of queries answered within 10 milliseconds")},
  92. {"answers10-100",
  93. MetricDefinition(PrometheusMetricType::counter,
  94. "Number of queries answered within 100 milliseconds")},
  95. {"answers100-1000",
  96. MetricDefinition(PrometheusMetricType::counter,
  97. "Number of queries answered within 1 second")},
  98. {"auth4-answers-slow",
  99. MetricDefinition(PrometheusMetricType::counter,
  100. "Number of queries answered by authoritatives over IPv4 after 1 second")},
  101. {"auth4-answers0-1",
  102. MetricDefinition(PrometheusMetricType::counter,
  103. "Number of queries answered by authoritatives over IPv4within 1 millisecond")},
  104. {"auth4-answers1-10",
  105. MetricDefinition(PrometheusMetricType::counter,
  106. "Number of queries answered by authoritatives over IPv4 within 10 milliseconds")},
  107. {"auth4-answers10-100",
  108. MetricDefinition(PrometheusMetricType::counter,
  109. "Number of queries answered by authoritatives over IPv4 within 100 milliseconds")},
  110. {"auth4-answers100-1000",
  111. MetricDefinition(PrometheusMetricType::counter,
  112. "Number of queries answered by authoritatives over IPv4 within 1 second")},
  113. {"auth6-answers-slow",
  114. MetricDefinition(PrometheusMetricType::counter,
  115. "Number of queries answered by authoritatives over IPv6 after 1 second")},
  116. {"auth6-answers0-1",
  117. MetricDefinition(PrometheusMetricType::counter,
  118. "Number of queries answered by authoritatives over IPv6 within 1 millisecond")},
  119. {"auth6-answers1-10",
  120. MetricDefinition(PrometheusMetricType::counter,
  121. "Number of queries answered by authoritatives over IPv6 within 10 milliseconds")},
  122. {"auth6-answers10-100",
  123. MetricDefinition(PrometheusMetricType::counter,
  124. "Number of queries answered by authoritatives over IPv6 within 100 milliseconds")},
  125. {"auth6-answers100-1000",
  126. MetricDefinition(PrometheusMetricType::counter,
  127. "Number of queries answered by authoritatives over IPv6 within 1 second")},
  128. {"auth-zone-queries",
  129. MetricDefinition(PrometheusMetricType::counter,
  130. "Number of queries to locally hosted authoritative zones (`setting-auth-zones`) since starting")},
  131. {"cache-bytes",
  132. MetricDefinition(PrometheusMetricType::gauge,
  133. "Size of the cache in bytes")},
  134. {"cache-entries",
  135. MetricDefinition(PrometheusMetricType::gauge,
  136. "Number of entries in the cache")},
  137. {"cache-hits",
  138. MetricDefinition(PrometheusMetricType::counter,
  139. "Number of of cache hits since starting, this does **not** include hits that got answered from the packet-cache")},
  140. {"cache-misses",
  141. MetricDefinition(PrometheusMetricType::counter,
  142. "Number of cache misses since starting")},
  143. {"case-mismatches",
  144. MetricDefinition(PrometheusMetricType::counter,
  145. "Number of mismatches in character case since starting")},
  146. {"chain-resends",
  147. MetricDefinition(PrometheusMetricType::counter,
  148. "Number of queries chained to existing outstanding")},
  149. {"client-parse-errors",
  150. MetricDefinition(PrometheusMetricType::counter,
  151. "Number of client packets that could not be parsed")},
  152. {"concurrent-queries",
  153. MetricDefinition(PrometheusMetricType::gauge,
  154. "Number of MThreads currently running")},
  155. {"cpu-msec-thread-0",
  156. MetricDefinition(PrometheusMetricType::counter,
  157. "Number of milliseconds spent in thread n")},
  158. {"dlg-only-drops",
  159. MetricDefinition(PrometheusMetricType::counter,
  160. "Number of records dropped because of `setting-delegation-only` setting")},
  161. {"dnssec-authentic-data-queries",
  162. MetricDefinition(PrometheusMetricType::counter,
  163. "Number of queries received with the AD bit set")},
  164. {"dnssec-check-disabled-queries",
  165. MetricDefinition(PrometheusMetricType::counter,
  166. "Number of queries received with the CD bit set")},
  167. {"dnssec-queries",
  168. MetricDefinition(PrometheusMetricType::counter,
  169. "Number of queries received with the DO bit set")},
  170. {"dnssec-result-bogus",
  171. MetricDefinition(PrometheusMetricType::counter,
  172. "Number of DNSSEC validations that had the Bogus state")},
  173. {"dnssec-result-indeterminate",
  174. MetricDefinition(PrometheusMetricType::counter,
  175. "Number of DNSSEC validations that had the Indeterminate state")},
  176. {"dnssec-result-insecure",
  177. MetricDefinition(PrometheusMetricType::counter,
  178. "Number of DNSSEC validations that had the Insecure state")},
  179. {"dnssec-result-nta",
  180. MetricDefinition(PrometheusMetricType::counter,
  181. "Number of DNSSEC validations that had the (negative trust anchor) state")},
  182. {"dnssec-result-secure",
  183. MetricDefinition(PrometheusMetricType::counter,
  184. "Number of DNSSEC validations that had the Secure state")},
  185. {"dnssec-validations",
  186. MetricDefinition(PrometheusMetricType::counter,
  187. "Number of DNSSEC validations performed")},
  188. {"dont-outqueries",
  189. MetricDefinition(PrometheusMetricType::counter,
  190. "Number of outgoing queries dropped because of `setting-dont-query` setting")},
  191. {"qname-min-fallback-success",
  192. MetricDefinition(PrometheusMetricType::counter,
  193. "Number of successful queries due to fallback mechanism within 'qname-minimization' setting")},
  194. {"ecs-queries",
  195. MetricDefinition(PrometheusMetricType::counter,
  196. "Number of outgoing queries adorned with an EDNS Client Subnet option")},
  197. {"ecs-responses",
  198. MetricDefinition(PrometheusMetricType::counter,
  199. "Number of responses received from authoritative servers with an EDNS Client Subnet option we used")},
  200. {"edns-ping-matches",
  201. MetricDefinition(PrometheusMetricType::counter,
  202. "Number of servers that sent a valid EDNS PING response")},
  203. {"edns-ping-mismatches",
  204. MetricDefinition(PrometheusMetricType::counter,
  205. "Number of servers that sent an invalid EDN PING response")},
  206. {"failed-host-entries",
  207. MetricDefinition(PrometheusMetricType::counter,
  208. "Number of servers that failed to resolve")},
  209. {"ignored-packets",
  210. MetricDefinition(PrometheusMetricType::counter,
  211. "Number of non-query packets received on server sockets that should only get query packets")},
  212. {"ipv6-outqueries",
  213. MetricDefinition(PrometheusMetricType::counter,
  214. "Number of outgoing queries over IPv6")},
  215. {"ipv6-questions",
  216. MetricDefinition(PrometheusMetricType::counter,
  217. "Number of end-user initiated queries with the RD bit set, received over IPv6 UDP")},
  218. {"malloc-bytes",
  219. MetricDefinition(PrometheusMetricType::counter,
  220. "Number of bytes allocated by the process (broken, always returns 0)")},
  221. {"max-cache-entries",
  222. MetricDefinition(PrometheusMetricType::gauge,
  223. "Currently configured maximum number of cache entries")},
  224. {"max-packetcache-entries",
  225. MetricDefinition(PrometheusMetricType::gauge,
  226. "Currently configured maximum number of packet cache entries")},
  227. {"max-mthread-stack",
  228. MetricDefinition(PrometheusMetricType::gauge,
  229. "Maximum amount of thread stack ever used")},
  230. {"negcache-entries",
  231. MetricDefinition(PrometheusMetricType::gauge,
  232. "Number of entries in the negative answer cache")},
  233. {"no-packet-error",
  234. MetricDefinition(PrometheusMetricType::counter,
  235. "Number of erroneous received packets")},
  236. {"noedns-outqueries",
  237. MetricDefinition(PrometheusMetricType::counter,
  238. "Number of queries sent out without EDNS")},
  239. {"noerror-answers",
  240. MetricDefinition(PrometheusMetricType::counter,
  241. "Number of NOERROR answers since starting")},
  242. {"noping-outqueries",
  243. MetricDefinition(PrometheusMetricType::counter,
  244. "Number of queries sent out without ENDS PING")},
  245. {"nsset-invalidations",
  246. MetricDefinition(PrometheusMetricType::counter,
  247. "Number of times an nsset was dropped because it no longer worked")},
  248. {"nsspeeds-entries",
  249. MetricDefinition(PrometheusMetricType::gauge,
  250. "Number of entries in the NS speeds map")},
  251. {"nxdomain-answers",
  252. MetricDefinition(PrometheusMetricType::counter,
  253. "Number of NXDOMAIN answers since starting")},
  254. {"outgoing-timeouts",
  255. MetricDefinition(PrometheusMetricType::counter,
  256. "Number of timeouts on outgoing UDP queries since starting")},
  257. {"outgoing4-timeouts",
  258. MetricDefinition(PrometheusMetricType::counter,
  259. "Number of timeouts on outgoing UDP IPv4 queries since starting")},
  260. {"outgoing6-timeouts",
  261. MetricDefinition(PrometheusMetricType::counter,
  262. "Number of timeouts on outgoing UDP IPv6 queries since starting")},
  263. {"over-capacity-drops",
  264. MetricDefinition(PrometheusMetricType::counter,
  265. "Number of questions dropped because over maximum concurrent query limit")},
  266. {"packetcache-bytes",
  267. MetricDefinition(PrometheusMetricType::gauge,
  268. "Size of the packet cache in bytes")},
  269. {"packetcache-entries",
  270. MetricDefinition(PrometheusMetricType::gauge,
  271. "Number of packet cache entries")},
  272. {"packetcache-hits",
  273. MetricDefinition(PrometheusMetricType::counter,
  274. "Number of packet cache hits")},
  275. {"packetcache-misses",
  276. MetricDefinition(PrometheusMetricType::counter,
  277. "Number of packet cache misses")},
  278. {"policy-drops",
  279. MetricDefinition(PrometheusMetricType::counter,
  280. "Number of packets dropped because of (Lua) policy decision")},
  281. {"policy-result-noaction",
  282. MetricDefinition(PrometheusMetricType::counter,
  283. "Number of packets that were not acted upon by the RPZ/filter engine")},
  284. {"policy-result-drop",
  285. MetricDefinition(PrometheusMetricType::counter,
  286. "Number of packets that were dropped by the RPZ/filter engine")},
  287. {"policy-result-nxdomain",
  288. MetricDefinition(PrometheusMetricType::counter,
  289. "Number of packets that were replied to with NXDOMAIN by the RPZ/filter engine")},
  290. {"policy-result-nodata",
  291. MetricDefinition(PrometheusMetricType::counter,
  292. "Number of packets that were replied to with no data by the RPZ/filter engine")},
  293. {"policy-result-truncate",
  294. MetricDefinition(PrometheusMetricType::counter,
  295. "Number of packets that were were forced to TCP by the RPZ/filter engine")},
  296. {"policy-result-custom",
  297. MetricDefinition(PrometheusMetricType::counter,
  298. "Number of packets that were sent a custom answer by the RPZ/filter engine")},
  299. {"qa-latency",
  300. MetricDefinition(PrometheusMetricType::gauge,
  301. "Shows the current latency average, in microseconds, exponentially weighted over past 'latency-statistic-size' packets")},
  302. {"query-pipe-full-drops",
  303. MetricDefinition(PrometheusMetricType::counter,
  304. "Number of questions dropped because the query distribution pipe was full")},
  305. {"questions",
  306. MetricDefinition(PrometheusMetricType::counter,
  307. "Counts all end-user initiated queries with the RD bit set")},
  308. {"rebalanced-queries",
  309. MetricDefinition(PrometheusMetricType::counter,
  310. "Number of queries balanced to a different worker thread because the first selected one was above the target load configured with 'distribution-load-factor'")},
  311. {"resource-limits",
  312. MetricDefinition(PrometheusMetricType::counter,
  313. "Number of queries that could not be performed because of resource limits")},
  314. {"security-status",
  315. MetricDefinition(PrometheusMetricType::gauge,
  316. "security status based on `securitypolling`")},
  317. {"server-parse-errors",
  318. MetricDefinition(PrometheusMetricType::counter,
  319. "Number of server replied packets that could not be parsed")},
  320. {"servfail-answers",
  321. MetricDefinition(PrometheusMetricType::counter,
  322. "Number of SERVFAIL answers since starting")},
  323. {"spoof-prevents",
  324. MetricDefinition(PrometheusMetricType::counter,
  325. "Number of times PowerDNS considered itself spoofed, and dropped the data")},
  326. {"sys-msec",
  327. MetricDefinition(PrometheusMetricType::counter,
  328. "Number of CPU milliseconds spent in 'system' mode")},
  329. {"tcp-client-overflow",
  330. MetricDefinition(PrometheusMetricType::counter,
  331. "Number of times an IP address was denied TCP access because it already had too many connections")},
  332. {"tcp-clients",
  333. MetricDefinition(PrometheusMetricType::gauge,
  334. "Number of currently active TCP/IP clients")},
  335. {"tcp-outqueries",
  336. MetricDefinition(PrometheusMetricType::counter,
  337. "Number of outgoing TCP queries since starting")},
  338. {"tcp-questions",
  339. MetricDefinition(PrometheusMetricType::counter,
  340. "Number of all incoming TCP queries since starting")},
  341. {"throttle-entries",
  342. MetricDefinition(PrometheusMetricType::gauge,
  343. "Number of of entries in the throttle map")},
  344. {"throttled-out",
  345. MetricDefinition(PrometheusMetricType::counter,
  346. "Number of throttled outgoing UDP queries since starting")},
  347. {"throttled-outqueries",
  348. MetricDefinition(PrometheusMetricType::counter,
  349. "Number of throttled outgoing UDP queries since starting")},
  350. {"too-old-drops",
  351. MetricDefinition(PrometheusMetricType::counter,
  352. "Number of questions dropped that were too old")},
  353. {"truncated-drops",
  354. MetricDefinition(PrometheusMetricType::counter,
  355. "Number of questions dropped because they were larger than 512 bytes")},
  356. {"empty-queries",
  357. MetricDefinition(PrometheusMetricType::counter,
  358. "Questions dropped because they had a QD count of 0")},
  359. {"unauthorized-tcp",
  360. MetricDefinition(PrometheusMetricType::counter,
  361. "Number of TCP questions denied because of allow-from restrictions")},
  362. {"unauthorized-udp",
  363. MetricDefinition(PrometheusMetricType::counter,
  364. "Number of UDP questions denied because of allow-from restrictions")},
  365. {"unexpected-packets",
  366. MetricDefinition(PrometheusMetricType::counter,
  367. "Number of answers from remote servers that were unexpected (might point to spoofing)")},
  368. {"unreachables",
  369. MetricDefinition(PrometheusMetricType::counter,
  370. "Number of times nameservers were unreachable since starting")},
  371. {"uptime",
  372. MetricDefinition(PrometheusMetricType::counter,
  373. "Number of seconds process has been running")},
  374. {"user-msec",
  375. MetricDefinition(PrometheusMetricType::counter,
  376. "Number of CPU milliseconds spent in 'user' mode")},
  377. {"variable-responses",
  378. MetricDefinition(PrometheusMetricType::counter,
  379. "Number of responses that were marked as 'variable'")},
  380. {"x-our-latency",
  381. MetricDefinition(PrometheusMetricType::counter,
  382. "How much time was spent within PowerDNS in microseconds")},
  383. {"x-ourtime0-1",
  384. MetricDefinition(PrometheusMetricType::counter,
  385. "Counts responses where between 0 and 1 milliseconds was spent within the Recursor")},
  386. {"x-ourtime1-2",
  387. MetricDefinition(PrometheusMetricType::counter,
  388. "Counts responses where between 1 and 2 milliseconds was spent within the Recursor")},
  389. {"x-ourtime2-4",
  390. MetricDefinition(PrometheusMetricType::counter,
  391. "Counts responses where between 2 and 4 milliseconds was spent within the Recursor")},
  392. {"x-ourtime4-8",
  393. MetricDefinition(PrometheusMetricType::counter,
  394. "Counts responses where between 4 and 8 milliseconds was spent within the Recursor")},
  395. {"x-ourtime8-16",
  396. MetricDefinition(PrometheusMetricType::counter,
  397. "Counts responses where between 8 and 16 milliseconds was spent within the Recursor")},
  398. {"x-ourtime16-32",
  399. MetricDefinition(PrometheusMetricType::counter,
  400. "Counts responses where between 16 and 32 milliseconds was spent within the Recursor")},
  401. {"x-ourtime-slow",
  402. MetricDefinition(PrometheusMetricType::counter,
  403. "Counts responses where more than 32 milliseconds was spent within the Recursor")},
  404. {"fd-usage",
  405. MetricDefinition(PrometheusMetricType::gauge,
  406. "Number of open file descriptors")},
  407. {"real-memory-usage",
  408. MetricDefinition(PrometheusMetricType::gauge,
  409. "Number of bytes real process memory usage")},
  410. {"udp-in-errors",
  411. MetricDefinition(PrometheusMetricType::counter,
  412. "From /proc/net/snmp InErrors")},
  413. {"udp-noport-errors",
  414. MetricDefinition(PrometheusMetricType::counter,
  415. "From /proc/net/snmp NoPorts")},
  416. {"udp-recvbuf-errors",
  417. MetricDefinition(PrometheusMetricType::counter,
  418. "From /proc/net/snmp RcvbufErrors")},
  419. {"udp-sndbuf-errors",
  420. MetricDefinition(PrometheusMetricType::counter,
  421. "From /proc/net/snmp SndbufErrors")},
  422. {"cpu-iowait",
  423. MetricDefinition(PrometheusMetricType::counter,
  424. "Time spent waiting for I/O to complete by the whole system, in units of USER_HZ")},
  425. {"cpu-steal",
  426. MetricDefinition(PrometheusMetricType::counter,
  427. "Stolen time, which is the time spent by the whole system in other operating systems when running in a virtualized environment, in units of USER_HZ")},
  428. };
  429. };
  430. extern MetricDefinitionStorage g_metricDefinitions;