From 4c052c35da0ae36b80c77eddba534f456028cc19 Mon Sep 17 00:00:00 2001 From: jxy_git Date: Fri, 21 Jun 2024 17:46:42 +0800 Subject: [PATCH] Add new collector and metrics for watchdog --- ...POFOQueue-to-default-netstat-metrics.patch | 83 +++++++++++++++++++ node_exporter.spec | 7 +- 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 backport-Add-TCPOFOQueue-to-default-netstat-metrics.patch diff --git a/backport-Add-TCPOFOQueue-to-default-netstat-metrics.patch b/backport-Add-TCPOFOQueue-to-default-netstat-metrics.patch new file mode 100644 index 0000000..96a2672 --- /dev/null +++ b/backport-Add-TCPOFOQueue-to-default-netstat-metrics.patch @@ -0,0 +1,83 @@ +From ef6bfd4d334b39f26f4f6addbdddf8d79cd47901 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Fran=C3=A7ois=20Rigault?= +Date: Sat, 2 Dec 2023 12:11:10 +0100 +Subject: [PATCH] Add TCPOFOQueue to default netstat metrics +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Adds a count for TCP packets received out of orders. This can be an +indication that there is packet loss on the way packets travel towards +this server. In that case, the sender will retransmit (and we can +already monitor the Tcp_RetransSegs there), but we have no way to +monitor the packet loss on the receiver side. When a packet is received +and the receiver detects previous one missing, it will increase the +TCPOFOQueue counter and reply with selective ACK to the sender, both +possible indications of packet loss. Confirmation of packet loss can be +achieved by taking packet captures, ignoring wireshark analysis, and +carefully looking at data being retransmitted based on the TCP seq. + +Just like RetransSegs, TCPOFOQueue should be interesting for any +deployment as a mean to detect packet loss, so here suggesting adding it +to the default list. + +Signed-off-by: François Rigault +--- + collector/fixtures/e2e-64k-page-output.txt | 3 +++ + collector/fixtures/e2e-output.txt | 3 +++ + collector/fixtures/proc/net/netstat | 4 ++-- + collector/netstat_linux.go | 2 +- + 4 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt +index 866db45f6f..72e509d20d 100644 +--- a/collector/fixtures/e2e-64k-page-output.txt ++++ b/collector/fixtures/e2e-64k-page-output.txt +@@ -2303,6 +2303,9 @@ node_netstat_TcpExt_SyncookiesRecv 0 + # HELP node_netstat_TcpExt_SyncookiesSent Statistic TcpExtSyncookiesSent. + # TYPE node_netstat_TcpExt_SyncookiesSent untyped + node_netstat_TcpExt_SyncookiesSent 0 ++# HELP node_netstat_TcpExt_TCPOFOQueue Statistic TcpExtTCPOFOQueue. ++# TYPE node_netstat_TcpExt_TCPOFOQueue untyped ++node_netstat_TcpExt_TCPOFOQueue 42 + # HELP node_netstat_TcpExt_TCPTimeouts Statistic TcpExtTCPTimeouts. + # TYPE node_netstat_TcpExt_TCPTimeouts untyped + node_netstat_TcpExt_TCPTimeouts 115 +diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt +index 0c5356d4fb..ef18c6c761 100644 +--- a/collector/fixtures/e2e-output.txt ++++ b/collector/fixtures/e2e-output.txt +@@ -2325,6 +2325,9 @@ node_netstat_TcpExt_SyncookiesRecv 0 + # HELP node_netstat_TcpExt_SyncookiesSent Statistic TcpExtSyncookiesSent. + # TYPE node_netstat_TcpExt_SyncookiesSent untyped + node_netstat_TcpExt_SyncookiesSent 0 ++# HELP node_netstat_TcpExt_TCPOFOQueue Statistic TcpExtTCPOFOQueue. ++# TYPE node_netstat_TcpExt_TCPOFOQueue untyped ++node_netstat_TcpExt_TCPOFOQueue 42 + # HELP node_netstat_TcpExt_TCPTimeouts Statistic TcpExtTCPTimeouts. + # TYPE node_netstat_TcpExt_TCPTimeouts untyped + node_netstat_TcpExt_TCPTimeouts 115 +diff --git a/collector/fixtures/proc/net/netstat b/collector/fixtures/proc/net/netstat +index 811f623273..de88470bdd 100644 +--- a/collector/fixtures/proc/net/netstat ++++ b/collector/fixtures/proc/net/netstat +@@ -1,4 +1,4 @@ +-TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLoss TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPChallengeACK TCPSYNChallenge +-TcpExt: 0 0 2 0 0 0 0 0 0 0 388812 0 0 0 0 6 102471 17 9 0 0 80568 0 168808 0 4471289 26 1433940 3744565 0 1 0 0 0 0 0 0 0 0 48 0 0 0 1 0 1 0 1 115 0 0 0 0 9 0 5 0 41 4 0 0 0 0 0 0 0 1 0 0 0 0 2 5 0 0 0 0 0 0 0 2 2 ++TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLoss TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPChallengeACK TCPSYNChallenge TCPOFOQueue ++TcpExt: 0 0 2 0 0 0 0 0 0 0 388812 0 0 0 0 6 102471 17 9 0 0 80568 0 168808 0 4471289 26 1433940 3744565 0 1 0 0 0 0 0 0 0 0 48 0 0 0 1 0 1 0 1 115 0 0 0 0 9 0 5 0 41 4 0 0 0 0 0 0 0 1 0 0 0 0 2 5 0 0 0 0 0 0 0 2 2 42 + IpExt: InNoRoutes InTruncatedPkts InMcastPkts OutMcastPkts InBcastPkts OutBcastPkts InOctets OutOctets InMcastOctets OutMcastOctets InBcastOctets OutBcastOctets + IpExt: 0 0 0 0 0 0 6286396970 2786264347 0 0 0 0 +diff --git a/collector/netstat_linux.go b/collector/netstat_linux.go +index 9115ef57b0..8e4c9f4de8 100644 +--- a/collector/netstat_linux.go ++++ b/collector/netstat_linux.go +@@ -36,7 +36,7 @@ const ( + ) + + var ( +- netStatFields = kingpin.Flag("collector.netstat.fields", "Regexp of fields to return for netstat collector.").Default("^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans|TCPTimeouts)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$").String() ++ netStatFields = kingpin.Flag("collector.netstat.fields", "Regexp of fields to return for netstat collector.").Default("^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans|TCPTimeouts|TCPOFOQueue)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$").String() + ) + + type netStatCollector struct { diff --git a/node_exporter.spec b/node_exporter.spec index c2b5f20..c63a6de 100644 --- a/node_exporter.spec +++ b/node_exporter.spec @@ -2,7 +2,7 @@ Name: node_exporter Version: 1.7.0 -Release: 3 +Release: 4 Summary: Exporter for machine metrics License: ASL 2.0 URL: https://github.com/prometheus/node_exporter @@ -13,6 +13,7 @@ Source0: https://github.com/prometheus/node_exporter/archive/v%{version}. Source1: node_exporter-vendor.tar.gz Source2: node_exporter.service Source3: node_exporter.sysconfig +Patch0: backport-Add-TCPOFOQueue-to-default-netstat-metrics.patch BuildRequires: systemd BuildRequires: promu @@ -26,7 +27,6 @@ ExclusiveArch: x86_64 aarch64 riscv64 loongarch64 %description Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors. - %prep %autosetup -p1 -n %{name}-%{version} tar -xzvf %{SOURCE1} @@ -63,6 +63,9 @@ getent passwd node_exporter > /dev/null || \ %config(noreplace)%{_sysconfdir}/sysconfig/node_exporter %changelog +* Fri Jun 21 2024 jiangxinyu - 1.7.0-4 +- Add new collector and metrics for watchdog + * Tue Apr 23 2024 jiangxinyu - 1.7.0-3 - Fix the problem of failure to start node_exporter service -- Gitee