Merge ~aieri/charm-nrpe:bug/1802887 into ~nrpe-charmers/charm-nrpe:master

Proposed by Andrea Ieri
Status: Merged
Approved by: Peter Sabaini
Approved revision: 7f22223d90fd7af5ef1b1318b6d538732fbd49a4
Merged at revision: d609bbe38148b8d30cf5e60843ec080dda36fd7b
Proposed branch: ~aieri/charm-nrpe:bug/1802887
Merge into: ~nrpe-charmers/charm-nrpe:master
Diff against target: 332 lines (+168/-23)
2 files modified
config.yaml (+3/-1)
files/plugins/check_mem.pl (+165/-22)
Reviewer Review Type Date Requested Status
Peter Sabaini Approve
Review via email: mp+358629@code.launchpad.net

Commit message

[check_mem] bump plugin version and turn on ignoring hugepages by default

To post a comment you must log in.
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

This merge proposal is being monitored by mergebot. Change the status to Approved to merge.

Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

Unable to determine commit message from repository - please click "Set commit message" and enter the commit message manually.

Revision history for this message
Peter Sabaini (peter-sabaini) wrote :

lgtm, thanks!

review: Approve
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

Change successfully merged at revision d609bbe38148b8d30cf5e60843ec080dda36fd7b

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/config.yaml b/config.yaml
2index 241f32e..1a967b5 100644
3--- a/config.yaml
4+++ b/config.yaml
5@@ -111,10 +111,12 @@ options:
6 .
7 Set to '' in order to disable this check.
8 mem:
9- default: "-C -u -w 85 -c 90"
10+ default: "-C -h -u -w 85 -c 90"
11 type: string
12 description: |
13 Check memory % used.
14+ By default, thresholds are applied to the non-hugepages portion of the
15+ memory.
16 .
17 Set to '' in order to disable this check.
18 lacp_bonds:
19diff --git a/files/plugins/check_mem.pl b/files/plugins/check_mem.pl
20index 37217de..9b4c997 100755
21--- a/files/plugins/check_mem.pl
22+++ b/files/plugins/check_mem.pl
23@@ -32,9 +32,9 @@ use Getopt::Std;
24 #TODO - Use an alarm
25
26 # Predefined exit codes for Nagios
27-use vars qw($opt_c $opt_f $opt_u $opt_w $opt_C $opt_v %exit_codes);
28-%exit_codes = ('UNKNOWN' ,-1,
29- 'OK' , 0,
30+use vars qw($opt_c $opt_f $opt_u $opt_w $opt_C $opt_v $opt_h %exit_codes);
31+%exit_codes = ('UNKNOWN' , 3,
32+ 'OK' , 0,
33 'WARNING' , 1,
34 'CRITICAL', 2,
35 );
36@@ -43,32 +43,50 @@ use vars qw($opt_c $opt_f $opt_u $opt_w $opt_C $opt_v %exit_codes);
37 init();
38
39 # Get the numbers:
40-my ($free_memory_kb,$used_memory_kb,$caches_kb) = get_memory_info();
41+my ($free_memory_kb,$used_memory_kb,$caches_kb,$hugepages_kb) = get_memory_info();
42 print "$free_memory_kb Free\n$used_memory_kb Used\n$caches_kb Cache\n" if ($opt_v);
43+print "$hugepages_kb Hugepages\n" if ($opt_v and $opt_h);
44
45 if ($opt_C) { #Do we count caches as free?
46 $used_memory_kb -= $caches_kb;
47 $free_memory_kb += $caches_kb;
48 }
49
50+if ($opt_h) {
51+ $used_memory_kb -= $hugepages_kb;
52+}
53+
54+print "$used_memory_kb Used (after Hugepages)\n" if ($opt_v);
55+
56 # Round to the nearest KB
57 $free_memory_kb = sprintf('%d',$free_memory_kb);
58 $used_memory_kb = sprintf('%d',$used_memory_kb);
59 $caches_kb = sprintf('%d',$caches_kb);
60
61 # Tell Nagios what we came up with
62-tell_nagios($used_memory_kb,$free_memory_kb,$caches_kb);
63+tell_nagios($used_memory_kb,$free_memory_kb,$caches_kb,$hugepages_kb);
64
65
66 sub tell_nagios {
67- my ($used,$free,$caches) = @_;
68-
69+ my ($used,$free,$caches,$hugepages) = @_;
70+
71 # Calculate Total Memory
72 my $total = $free + $used;
73 print "$total Total\n" if ($opt_v);
74
75- my $perfdata = "|TOTAL=${total}KB;;;; USED=${used}KB;;;; FREE=${free}KB;;;; CACHES=${caches}KB;;;;";
76-
77+ my $perf_warn;
78+ my $perf_crit;
79+ if ( $opt_u ) {
80+ $perf_warn = int(${total} * $opt_w / 100);
81+ $perf_crit = int(${total} * $opt_c / 100);
82+ } else {
83+ $perf_warn = int(${total} * ( 100 - $opt_w ) / 100);
84+ $perf_crit = int(${total} * ( 100 - $opt_c ) / 100);
85+ }
86+
87+ my $perfdata = "|TOTAL=${total}KB;;;; USED=${used}KB;${perf_warn};${perf_crit};; FREE=${free}KB;;;; CACHES=${caches}KB;;;;";
88+ $perfdata .= " HUGEPAGES=${hugepages}KB;;;;" if ($opt_h);
89+
90 if ($opt_f) {
91 my $percent = sprintf "%.1f", ($free / $total * 100);
92 if ($percent <= $opt_c) {
93@@ -104,13 +122,14 @@ sub usage() {
94 print " -f Check FREE memory\n";
95 print " -u Check USED memory\n";
96 print " -C Count OS caches as FREE memory\n";
97+ print " -h Remove hugepages from the total memory count\n";
98 print " -w PERCENT Percent free/used when to warn\n";
99 print " -c PERCENT Percent free/used when critical\n";
100 print "\nCopyright (C) 2000 Dan Larsson <dl\@tyfon.net>\n";
101 print "check_mem.pl comes with absolutely NO WARRANTY either implied or explicit\n";
102 print "This program is licensed under the terms of the\n";
103 print "MIT License (check source code for details)\n";
104- exit $exit_codes{'UNKNOWN'};
105+ exit $exit_codes{'UNKNOWN'};
106 }
107
108 sub get_memory_info {
109@@ -118,6 +137,9 @@ sub get_memory_info {
110 my $free_memory_kb = 0;
111 my $total_memory_kb = 0;
112 my $caches_kb = 0;
113+ my $hugepages_nr = 0;
114+ my $hugepages_size = 0;
115+ my $hugepages_kb = 0;
116
117 my $uname;
118 if ( -e '/usr/bin/uname') {
119@@ -146,8 +168,108 @@ sub get_memory_info {
120 elsif (/^(Buffers|Cached|SReclaimable):\s+(\d+) kB/) {
121 $caches_kb += $2;
122 }
123+ elsif (/^Shmem:\s+(\d+) kB/) {
124+ $caches_kb -= $1;
125+ }
126+ # These variables will most likely be overwritten once we look into
127+ # /sys/kernel/mm/hugepages, unless we are running on linux <2.6.27
128+ # and have to rely on them
129+ elsif (/^HugePages_Total:\s+(\d+)/) {
130+ $hugepages_nr = $1;
131+ }
132+ elsif (/^Hugepagesize:\s+(\d+) kB/) {
133+ $hugepages_size = $1;
134+ }
135 }
136+ $hugepages_kb = $hugepages_nr * $hugepages_size;
137 $used_memory_kb = $total_memory_kb - $free_memory_kb;
138+
139+ # Read hugepages info from the newer sysfs interface if available
140+ my $hugepages_sysfs_dir = '/sys/kernel/mm/hugepages';
141+ if ( -d $hugepages_sysfs_dir ) {
142+ # Reset what we read from /proc/meminfo
143+ $hugepages_kb = 0;
144+ opendir(my $dh, $hugepages_sysfs_dir)
145+ || die "Can't open $hugepages_sysfs_dir: $!";
146+ while (my $entry = readdir $dh) {
147+ if ($entry =~ /^hugepages-(\d+)kB/) {
148+ $hugepages_size = $1;
149+ my $hugepages_nr_file = "$hugepages_sysfs_dir/$entry/nr_hugepages";
150+ open(my $fh, '<', $hugepages_nr_file)
151+ || die "Can't open $hugepages_nr_file for reading: $!";
152+ $hugepages_nr = <$fh>;
153+ close($fh);
154+ $hugepages_kb += $hugepages_nr * $hugepages_size;
155+ }
156+ }
157+ closedir($dh);
158+ }
159+ }
160+ elsif ( $uname =~ /HP-UX/ ) {
161+ # HP-UX, thanks to Christoph Fürstaller
162+ my @meminfo = `/usr/bin/sudo /usr/local/bin/kmeminfo`;
163+ foreach (@meminfo) {
164+ chomp;
165+ if (/^Physical memory\s\s+=\s+(\d+)\s+(\d+.\d)g/) {
166+ $total_memory_kb = ($2 * 1024 * 1024);
167+ }
168+ elsif (/^Free memory\s\s+=\s+(\d+)\s+(\d+.\d)g/) {
169+ $free_memory_kb = ($2 * 1024 * 1024);
170+ }
171+ }
172+ $used_memory_kb = $total_memory_kb - $free_memory_kb;
173+ }
174+ elsif ( $uname =~ /FreeBSD/ ) {
175+ # The FreeBSD case. 2013-03-19 www.claudiokuenzler.com
176+ # free mem = Inactive*Page Size + Cache*Page Size + Free*Page Size
177+ my $pagesize = `sysctl vm.stats.vm.v_page_size`;
178+ $pagesize =~ s/[^0-9]//g;
179+ my $mem_inactive = 0;
180+ my $mem_cache = 0;
181+ my $mem_free = 0;
182+ my $mem_total = 0;
183+ my $free_memory = 0;
184+ my @meminfo = `/sbin/sysctl vm.stats.vm`;
185+ foreach (@meminfo) {
186+ chomp;
187+ if (/^vm.stats.vm.v_inactive_count:\s+(\d+)/) {
188+ $mem_inactive = ($1 * $pagesize);
189+ }
190+ elsif (/^vm.stats.vm.v_cache_count:\s+(\d+)/) {
191+ $mem_cache = ($1 * $pagesize);
192+ }
193+ elsif (/^vm.stats.vm.v_free_count:\s+(\d+)/) {
194+ $mem_free = ($1 * $pagesize);
195+ }
196+ elsif (/^vm.stats.vm.v_page_count:\s+(\d+)/) {
197+ $mem_total = ($1 * $pagesize);
198+ }
199+ }
200+ $free_memory = $mem_inactive + $mem_cache + $mem_free;
201+ $free_memory_kb = ( $free_memory / 1024);
202+ $total_memory_kb = ( $mem_total / 1024);
203+ $used_memory_kb = $total_memory_kb - $free_memory_kb;
204+ $caches_kb = ($mem_cache / 1024);
205+ }
206+ elsif ( $uname =~ /joyent/ ) {
207+ # The SmartOS case. 2014-01-10 www.claudiokuenzler.com
208+ # free mem = pagesfree * pagesize
209+ my $pagesize = `pagesize`;
210+ my $phys_pages = `kstat -p unix:0:system_pages:pagestotal | awk '{print \$NF}'`;
211+ my $free_pages = `kstat -p unix:0:system_pages:pagesfree | awk '{print \$NF}'`;
212+ my $arc_size = `kstat -p zfs:0:arcstats:size | awk '{print \$NF}'`;
213+ my $arc_size_kb = $arc_size / 1024;
214+
215+ print "Pagesize is $pagesize" if ($opt_v);
216+ print "Total pages is $phys_pages" if ($opt_v);
217+ print "Free pages is $free_pages" if ($opt_v);
218+ print "Arc size is $arc_size" if ($opt_v);
219+
220+ $caches_kb += $arc_size_kb;
221+
222+ $total_memory_kb = $phys_pages * $pagesize / 1024;
223+ $free_memory_kb = $free_pages * $pagesize / 1024;
224+ $used_memory_kb = $total_memory_kb - $free_memory_kb;
225 }
226 elsif ( $uname =~ /SunOS/ ) {
227 eval "use Sun::Solaris::Kstat";
228@@ -170,7 +292,7 @@ sub get_memory_info {
229 }
230 }
231 $used_memory_kb = $total_memory_kb - $free_memory_kb;
232-
233+
234 }
235 else { # We have kstat
236 my $kstat = Sun::Solaris::Kstat->new();
237@@ -180,21 +302,39 @@ sub get_memory_info {
238 # to me how to determine UFS's cache size. There's inode_cache,
239 # and maybe the physmem variable in the system_pages module??
240 # In the real world, it looks to be so small as not to really matter,
241- # so we don't grab it. If someone can give me code that does this,
242+ # so we don't grab it. If someone can give me code that does this,
243 # I'd be glad to put it in.
244 my $arc_size = (exists ${kstat}->{zfs} && ${kstat}->{zfs}->{0}->{arcstats}->{size}) ?
245- ${kstat}->{zfs}->{0}->{arcstats}->{size} / 1024
246+ ${kstat}->{zfs}->{0}->{arcstats}->{size} / 1024
247 : 0;
248 $caches_kb += $arc_size;
249 my $pagesize = `pagesize`;
250-
251+
252 $total_memory_kb = $phys_pages * $pagesize / 1024;
253 $free_memory_kb = $free_pages * $pagesize / 1024;
254 $used_memory_kb = $total_memory_kb - $free_memory_kb;
255 }
256 }
257+ elsif ( $uname =~ /Darwin/ ) {
258+ $total_memory_kb = (split(/ /,`/usr/sbin/sysctl hw.memsize`))[1]/1024;
259+ my $pagesize = (split(/ /,`/usr/sbin/sysctl hw.pagesize`))[1];
260+ $caches_kb = 0;
261+ my @vm_stat = `/usr/bin/vm_stat`;
262+ foreach (@vm_stat) {
263+ chomp;
264+ if (/^(Pages free):\s+(\d+)\.$/) {
265+ $free_memory_kb = $2*$pagesize/1024;
266+ }
267+ # 'caching' concept works different on MACH
268+ # this should be a reasonable approximation
269+ elsif (/^Pages (inactive|purgable):\s+(\d+).$/) {
270+ $caches_kb += $2*$pagesize/1024;
271+ }
272+ }
273+ $used_memory_kb = $total_memory_kb - $free_memory_kb;
274+ }
275 elsif ( $uname =~ /AIX/ ) {
276- my @meminfo = `/usr/bin/vmstat -v`;
277+ my @meminfo = `/usr/bin/vmstat -vh`;
278 foreach (@meminfo) {
279 chomp;
280 if (/^\s*([0-9.]+)\s+(.*)/) {
281@@ -208,6 +348,9 @@ sub get_memory_info {
282 if ($counter_name eq 'file pages') {
283 $caches_kb = $1*4;
284 }
285+ if ($counter_name eq 'Number of 4k page frames loaned') {
286+ $free_memory_kb += $1*4;
287+ }
288 }
289 }
290 $used_memory_kb = $total_memory_kb - $free_memory_kb;
291@@ -217,16 +360,16 @@ sub get_memory_info {
292 print "You can't report on $uname caches!\n";
293 exit $exit_codes{UNKNOWN};
294 }
295- my $command_line = `vmstat | tail -1 | awk '{print \$4,\$5}'`;
296- chomp $command_line;
297+ my $command_line = `vmstat | tail -1 | awk '{print \$4,\$5}'`;
298+ chomp $command_line;
299 my @memlist = split(/ /, $command_line);
300-
301+
302 # Define the calculating scalars
303 $used_memory_kb = $memlist[0]/1024;
304 $free_memory_kb = $memlist[1]/1024;
305 $total_memory_kb = $used_memory_kb + $free_memory_kb;
306 }
307- return ($free_memory_kb,$used_memory_kb,$caches_kb);
308+ return ($free_memory_kb,$used_memory_kb,$caches_kb,$hugepages_kb);
309 }
310
311 sub init {
312@@ -235,9 +378,9 @@ sub init {
313 &usage;
314 }
315 else {
316- getopts('c:fuCvw:');
317+ getopts('c:fuChvw:');
318 }
319-
320+
321 # Shortcircuit the switches
322 if (!$opt_w or $opt_w == 0 or !$opt_c or $opt_c == 0) {
323 print "*** You must define WARN and CRITICAL levels!\n";
324@@ -247,7 +390,7 @@ sub init {
325 print "*** You must select to monitor either USED or FREE memory!\n";
326 &usage;
327 }
328-
329+
330 # Check if levels are sane
331 if ($opt_w <= $opt_c and $opt_f) {
332 print "*** WARN level must not be less than CRITICAL when checking FREE memory!\n";

Subscribers

People subscribed via source and target branches