Merge lp:~percona-toolkit-dev/percona-toolkit/pt-stalk-sleep-collect-option into lp:~percona-toolkit-dev/percona-toolkit/release-2.2.6

Proposed by Daniel Nichter
Status: Merged
Approved by: Daniel Nichter
Approved revision: 589
Merged at revision: 604
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-stalk-sleep-collect-option
Merge into: lp:~percona-toolkit-dev/percona-toolkit/release-2.2.6
Diff against target: 186 lines (+34/-23)
3 files modified
bin/pt-stalk (+20/-11)
lib/bash/collect.sh (+13/-11)
t/pt-stalk/samples/config001.conf (+1/-1)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-stalk-sleep-collect-option
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Ryan Lowe (community) Approve
Review via email: mp+196952@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Ryan Lowe (ryan-a-lowe) wrote :

LGTM

review: Approve
589. By Daniel Nichter

Revert r587: *do* sync on clock ticks. Fix pt-stalk.t (127.1 no longer works?). Enhance --sleep-collect docs.

Revision history for this message
Daniel Nichter (daniel-nichter) wrote :

Passes tests, looks good too:

# ./pt-stalk --no-stalk --run-time 20 --sleep-collect 5 --iterations 1
2013_12_14_04_26_10 Starting ./pt-stalk --function=status --variable=Threads_running --threshold=25 --match= --cycles=0 --interval=1 --iterations=1 --run-time=20 --sleep=300 --dest=/var/lib/pt-stalk --prefix= --notify-by-email= --log=/var/log/pt-stalk.log --pid=/var/run/pt-stalk.pid --plugin=
2013_12_14_04_26_10 Not stalking; collect triggered immediately
2013_12_14_04_26_10 Collect 1 triggered
2013_12_14_04_26_10 Collect 1 PID 16360
2013_12_14_04_26_10 Collect 1 done
2013_12_14_04_26_10 Waiting up to 60 seconds for subprocesses to finish...
2013_12_14_04_26_31 Exiting because no more iterations
2013_12_14_04_26_31 ./pt-stalk exit status 0

# clear;grep TS *-df
TS 1386995175.002241129 2013-12-14 04:26:15
TS 1386995180.002265762 2013-12-14 04:26:20
TS 1386995185.002345252 2013-12-14 04:26:25
TS 1386995190.002934961 2013-12-14 04:26:30

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-stalk'
2--- bin/pt-stalk 2013-10-31 18:23:02 +0000
3+++ bin/pt-stalk 2013-12-14 04:20:32 +0000
4@@ -862,20 +862,21 @@
5 if [ "$CMD_SYSCTL" ]; then
6 $CMD_SYSCTL -a >> "$d/$p-sysctl" &
7 fi
8+ local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
9 if [ "$CMD_VMSTAT" ]; then
10- $CMD_VMSTAT 1 $OPT_RUN_TIME >> "$d/$p-vmstat" &
11+ $CMD_VMSTAT $OPT_SLEEP_COLLECT $cnt >> "$d/$p-vmstat" &
12 $CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" &
13 fi
14 if [ "$CMD_IOSTAT" ]; then
15- $CMD_IOSTAT -dx 1 $OPT_RUN_TIME >> "$d/$p-iostat" &
16+ $CMD_IOSTAT -dx $OPT_SLEEP_COLLECT $cnt >> "$d/$p-iostat" &
17 $CMD_IOSTAT -dx $OPT_RUN_TIME 2 >> "$d/$p-iostat-overall" &
18 fi
19 if [ "$CMD_MPSTAT" ]; then
20- $CMD_MPSTAT -P ALL 1 $OPT_RUN_TIME >> "$d/$p-mpstat" &
21+ $CMD_MPSTAT -P ALL $OPT_SLEEP_COLLECT $cnt >> "$d/$p-mpstat" &
22 $CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" &
23 fi
24
25- $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" &
26+ $CMD_MYSQLADMIN $EXT_ARGV ext -i$OPT_SLEEP_COLLECT -c$cnt >>"$d/$p-mysqladmin" &
27 local mysqladmin_pid=$!
28
29 local have_lock_waits_table=""
30@@ -886,7 +887,10 @@
31 fi
32
33 log "Loop start: $(date +'TS %s.%N %F %T')"
34- for loopno in $(_seq $OPT_RUN_TIME); do
35+ local start_time=$(date +'%s')
36+ local curr_time=$start_time
37+ while [ $((curr_time - start_time)) -lt $OPT_RUN_TIME ]; do
38+
39 disk_space $d > $d/$p-disk-space
40 check_disk_space \
41 $d/$p-disk-space \
42@@ -894,10 +898,9 @@
43 "$OPT_DISK_PCT_FREE" \
44 || break
45
46- sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
47+ sleep $(date +'%s.%N' | awk "{print $OPT_SLEEP_COLLECT - (\$1 % $OPT_SLEEP_COLLECT)}")
48 local ts="$(date +"TS %s.%N %F %T")"
49
50-
51 if [ -d "/proc" ]; then
52 if [ -f "/proc/diskstats" ]; then
53 (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" &
54@@ -918,19 +921,17 @@
55 (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" &
56 fi
57 fi
58-
59 (echo $ts; df -k) >> "$d/$p-df" &
60-
61 (echo $ts; netstat -antp) >> "$d/$p-netstat" &
62 (echo $ts; netstat -s) >> "$d/$p-netstat_s" &
63-
64 (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
65 >> "$d/$p-processlist" &
66-
67 if [ "$have_lock_waits_table" ]; then
68 (echo $ts; lock_waits) >>"$d/$p-lock-waits" &
69 (echo $ts; transactions) >>"$d/$p-transactions" &
70 fi
71+
72+ curr_time=$(date +'%s')
73 done
74 log "Loop end: $(date +'TS %s.%N %F %T')"
75
76@@ -1995,6 +1996,14 @@
77 It also prevents filling up the disk or gathering too much data to analyze
78 reasonably.
79
80+=item --sleep-collect
81+
82+type: int; default: 1
83+
84+How long to sleep between collection loop cycles. This is useful with
85+C<--no-stalk> to do long collections. For example, to collect data every
86+minute for an hour, specify: C<--no-stalk --run-time 3600 --sleep-collect 60>.
87+
88 =item --socket
89
90 short form: -S; type: string
91
92=== modified file 'lib/bash/collect.sh'
93--- lib/bash/collect.sh 2013-03-12 21:23:02 +0000
94+++ lib/bash/collect.sh 2013-12-14 04:20:32 +0000
95@@ -146,16 +146,17 @@
96 if [ "$CMD_SYSCTL" ]; then
97 $CMD_SYSCTL -a >> "$d/$p-sysctl" &
98 fi
99+ local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
100 if [ "$CMD_VMSTAT" ]; then
101- $CMD_VMSTAT 1 $OPT_RUN_TIME >> "$d/$p-vmstat" &
102+ $CMD_VMSTAT $OPT_SLEEP_COLLECT $cnt >> "$d/$p-vmstat" &
103 $CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" &
104 fi
105 if [ "$CMD_IOSTAT" ]; then
106- $CMD_IOSTAT -dx 1 $OPT_RUN_TIME >> "$d/$p-iostat" &
107+ $CMD_IOSTAT -dx $OPT_SLEEP_COLLECT $cnt >> "$d/$p-iostat" &
108 $CMD_IOSTAT -dx $OPT_RUN_TIME 2 >> "$d/$p-iostat-overall" &
109 fi
110 if [ "$CMD_MPSTAT" ]; then
111- $CMD_MPSTAT -P ALL 1 $OPT_RUN_TIME >> "$d/$p-mpstat" &
112+ $CMD_MPSTAT -P ALL $OPT_SLEEP_COLLECT $cnt >> "$d/$p-mpstat" &
113 $CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" &
114 fi
115
116@@ -165,7 +166,7 @@
117 # get and keep a connection to the database; in troubled times
118 # the database tends to exceed max_connections, so reconnecting
119 # in the loop tends not to work very well.
120- $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" &
121+ $CMD_MYSQLADMIN $EXT_ARGV ext -i$OPT_SLEEP_COLLECT -c$cnt >>"$d/$p-mysqladmin" &
122 local mysqladmin_pid=$!
123
124 local have_lock_waits_table=""
125@@ -178,7 +179,10 @@
126 # This loop gathers data for the rest of the duration, and defines the time
127 # of the whole job.
128 log "Loop start: $(date +'TS %s.%N %F %T')"
129- for loopno in $(_seq $OPT_RUN_TIME); do
130+ local start_time=$(date +'%s')
131+ local curr_time=$start_time
132+ while [ $((curr_time - start_time)) -lt $OPT_RUN_TIME ]; do
133+
134 # We check the disk, but don't exit, because we need to stop jobs if we
135 # need to exit.
136 disk_space $d > $d/$p-disk-space
137@@ -188,14 +192,14 @@
138 "$OPT_DISK_PCT_FREE" \
139 || break
140
141+ # Sleep between collect cycles.
142 # Synchronize ourselves onto the clock tick, so the sleeps are 1-second
143- sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
144+ sleep $(date +'%s.%N' | awk "{print $OPT_SLEEP_COLLECT - (\$1 % $OPT_SLEEP_COLLECT)}")
145 local ts="$(date +"TS %s.%N %F %T")"
146
147 # #####################################################################
148 # Collect data for this cycle.
149 # #####################################################################
150-
151 if [ -d "/proc" ]; then
152 if [ -f "/proc/diskstats" ]; then
153 (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" &
154@@ -216,19 +220,17 @@
155 (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" &
156 fi
157 fi
158-
159 (echo $ts; df -k) >> "$d/$p-df" &
160-
161 (echo $ts; netstat -antp) >> "$d/$p-netstat" &
162 (echo $ts; netstat -s) >> "$d/$p-netstat_s" &
163-
164 (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
165 >> "$d/$p-processlist" &
166-
167 if [ "$have_lock_waits_table" ]; then
168 (echo $ts; lock_waits) >>"$d/$p-lock-waits" &
169 (echo $ts; transactions) >>"$d/$p-transactions" &
170 fi
171+
172+ curr_time=$(date +'%s')
173 done
174 log "Loop end: $(date +'TS %s.%N %F %T')"
175
176
177=== modified file 't/pt-stalk/samples/config001.conf'
178--- t/pt-stalk/samples/config001.conf 2012-01-24 18:50:48 +0000
179+++ t/pt-stalk/samples/config001.conf 2013-12-14 04:20:32 +0000
180@@ -4,5 +4,5 @@
181 --
182 -umsandbox
183 -pmsandbox
184---host 127.1
185+--host 127.0.0.1
186 --port 12345

Subscribers

People subscribed via source and target branches

to all changes: