Merge lp:~pandemicsyn/swift/recon-replication-cleanup into lp:~hudson-openstack/swift/trunk

Proposed by Florian Hines
Status: Merged
Approved by: David Goetz
Approved revision: 353
Merged at revision: 353
Proposed branch: lp:~pandemicsyn/swift/recon-replication-cleanup
Merge into: lp:~hudson-openstack/swift/trunk
Diff against target: 290 lines (+162/-59)
4 files modified
bin/swift-recon-cron (+64/-56)
swift/common/middleware/recon.py (+4/-1)
swift/common/utils.py (+75/-1)
swift/obj/replicator.py (+19/-1)
To merge this branch: bzr merge lp:~pandemicsyn/swift/recon-replication-cleanup
Reviewer Review Type Date Requested Status
David Goetz (community) Approve
John Dickinson Approve
Review via email: mp+73557@code.launchpad.net

Description of the change

obj replicator can now log replication stats for recon directly:

in object-server.conf:

[object-replicator]
vm_test_mode = yes
recon_enable = yes
recon_cache_path = /var/cache/swift

Also replaced the swift-recon bash cronjob with a friendlier/cleaner python version, that now only obtains async stats. Basic usage:

$ bin/swift-recon-cron
Usage: swift-recon-cron CONF_FILE

#CONF_FILE = path to your object-server.conf

$ bin/swift-recon-cron /etc/swift/object-server.conf

To post a comment you must log in.
352. By Florian Hines

pep8

Revision history for this message
John Dickinson (notmyname) wrote :

yay

review: Approve
Revision history for this message
David Goetz (david-goetz) wrote :

I haven't run this yet but here's some stuff just from looking at it:

instead of:
import simplejson

do:

try:
    import simplejson as json
except ImportError:
    import json

and then use json instead of simplejson in the code.
------------------

for this:
 try:
105
+ os.mkdir("/var/lock/swift-recon-object-cron")
106
+ except OSError as e:
107
+ logger.critical("%s" % e)
108
+ sys.exit(1)

maybe print the error to make it easier to debug

------------------

use utils.TRUE_VALUES here

self.recon_enable = conf.get(
234
+ 'recon_enable', 'no').lower() in ('yes', 'true', 'on', '1')

------------------

except Exception:
250
+ self.logger.exception(_('Exception dumping recon cache'))

maybe be useful to log the actual exception

review: Needs Fixing
353. By Florian Hines

simplejson import and exception/logging fixes

Revision history for this message
David Goetz (david-goetz) wrote :

looks good

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/swift-recon-cron'
2--- bin/swift-recon-cron 2011-07-28 03:29:23 +0000
3+++ bin/swift-recon-cron 2011-09-01 18:47:51 +0000
4@@ -1,56 +1,64 @@
5-#!/bin/bash
6-
7-#ghetto temporary cronjob to pull some of the stats for swift-recon
8-#usage: swift-recon-cron /var/log/swift/storage.log
9-# run it as frequently as you like, will skip runs during periods
10-# of high async pendings when the find takes a while.
11-#todo: everything.
12-
13-SYSLOG_FACILITY="local2"
14-ASYNC_PATH="/srv/node/sd[a-z]/async_pending/"
15-RECON_CACHE_PATH="/var/cache/swift"
16-
17-LOCKFILE="/var/lock/swift-recon-object.lock"
18-if [ -e $LOCKFILE ]; then
19- echo "NOTICE - $0 lock present - cron jobs overlapping ?"
20- echo "$0 lock file present" | /usr/bin/logger -p $SYSLOG_FACILITY.err
21- exit 1
22-else
23- touch $LOCKFILE
24-fi
25-
26-
27-if [ -z "$1" ]; then
28- LOGFILE="/var/log/swift/storage.log"
29-else
30- LOGFILE=$1
31-fi
32-
33-if [ ! -r "$LOGFILE" ]; then
34- echo "$0: error $LOGFILE not readable" | /usr/bin/logger -p $SYSLOG_FACILITY.err
35- rm $LOCKFILE
36- exit 1
37-fi
38-
39-if [ ! -d "$RECON_CACHE_PATH" ]; then
40- mkdir $RECON_CACHE_PATH
41-fi
42-
43-TMPF=`/bin/mktemp`
44-
45-asyncs=$(find $ASYNC_PATH -type f 2> /dev/null| wc -l)
46-#asyncs=$(find /srv/[1-4]/node/sd[a-z]1/async_pending/ -type f 2> /dev/null| wc -l) #saio
47-objrep=$(grep "Object replication complete." $LOGFILE | tail -n 1 | awk '{print $9}' | sed -e 's/(//g')
48-objincoming=$(netstat -aln | egrep "tcp.*:6000.*:.*ESTABLISHED" -c)
49-#objtw=$(netstat -aln | egrep "tcp.*:6000.*:.*TIME_WAIT" -c)
50-
51-echo "{\"async_pending\":$asyncs, \"object_replication_time\":$objrep, \"object_established_conns\":$objincoming}" > $TMPF
52-
53-mv $TMPF $RECON_CACHE_PATH/object.recon
54-if [ $? -ne 0 ]; then
55- echo "$0: $TMPF rename failed" | /usr/bin/logger -p $SYSLOG_FACILITY.err
56- rm -f $TMPF $LOCKFILE
57- exit 1
58-fi
59-rm -f $TMPF $LOCKFILE
60-exit 0
61+#!/usr/bin/env python
62+"""
63+swift-recon-cron.py
64+"""
65+
66+import os
67+import sys
68+import optparse
69+from tempfile import NamedTemporaryFile
70+try:
71+ import simplejson as json
72+except ImportError:
73+ import json
74+from ConfigParser import ConfigParser
75+from swift.common.utils import get_logger, dump_recon_cache
76+
77+
78+def async_count(device_dir, logger):
79+ async_count = 0
80+ for i in os.listdir(device_dir):
81+ asyncdir = os.path.join(device_dir, i, "async_pending")
82+ if os.path.isdir(asyncdir):
83+ for entry in os.listdir(asyncdir):
84+ if os.path.isdir(os.path.join(asyncdir, entry)):
85+ async_hdir = os.path.join(asyncdir, entry)
86+ async_count += len(os.listdir(async_hdir))
87+ return async_count
88+
89+
90+def main():
91+ c = ConfigParser()
92+ try:
93+ conf_path = sys.argv[1]
94+ except Exception:
95+ print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]
96+ print "ex: swift-recon-cron /etc/swift/object-server.conf"
97+ sys.exit(1)
98+ if not c.read(conf_path):
99+ print "Unable to read config file %s" % conf_path
100+ sys.exit(1)
101+ conf = dict(c.items('filter:recon'))
102+ device_dir = conf.get('devices', '/srv/node')
103+ recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift')
104+ cache_file = os.path.join(recon_cache_path, "object.recon")
105+ conf['log_name'] = conf.get('log_name', 'recon-cron')
106+ logger = get_logger(conf, log_route='recon-cron')
107+ try:
108+ os.mkdir("/var/lock/swift-recon-object-cron")
109+ except OSError as e:
110+ logger.critical(_(str(e)))
111+ print str(e)
112+ sys.exit(1)
113+ asyncs = async_count(device_dir, logger)
114+ try:
115+ dump_recon_cache('async_pending', asyncs, cache_file)
116+ except Exception:
117+ logger.exception(_('Exception dumping recon cache'))
118+ try:
119+ os.rmdir("/var/lock/swift-recon-object-cron")
120+ except Exception:
121+ logger.exception(_('Exception remove cronjob lock'))
122+
123+if __name__ == '__main__':
124+ main()
125
126=== modified file 'swift/common/middleware/recon.py'
127--- swift/common/middleware/recon.py 2011-08-14 15:49:15 +0000
128+++ swift/common/middleware/recon.py 2011-09-01 18:47:51 +0000
129@@ -17,7 +17,10 @@
130 from swift.common.utils import split_path, cache_from_env, get_logger
131 from swift.common.constraints import check_mount
132 from hashlib import md5
133-import simplejson as json
134+try:
135+ import simplejson as json
136+except ImportError:
137+ import json
138 import os
139
140
141
142=== modified file 'swift/common/utils.py'
143--- swift/common/utils.py 2011-08-15 21:09:11 +0000
144+++ swift/common/utils.py 2011-09-01 18:47:51 +0000
145@@ -33,7 +33,11 @@
146 from ConfigParser import ConfigParser, NoSectionError, NoOptionError, \
147 RawConfigParser
148 from optparse import OptionParser
149-from tempfile import mkstemp
150+from tempfile import mkstemp, NamedTemporaryFile
151+try:
152+ import simplejson as json
153+except ImportError:
154+ import json
155 import cPickle as pickle
156 import glob
157 from urlparse import urlparse as stdlib_urlparse, ParseResult
158@@ -634,6 +638,46 @@
159 os.close(fd)
160
161
162+@contextmanager
163+def lock_file(filename, timeout=10, append=False, unlink=True):
164+ """
165+ Context manager that acquires a lock on a file. This will block until
166+ the lock can be acquired, or the timeout time has expired (whichever occurs
167+ first).
168+
169+ :param filename: file to be locked
170+ :param timeout: timeout (in seconds)
171+ :param append: True if file should be opened in append mode
172+ :param unlink: True if the file should be unlinked at the end
173+ """
174+ flags = os.O_CREAT | os.O_RDWR
175+ if append:
176+ flags |= os.O_APPEND
177+ fd = os.open(filename, flags)
178+ try:
179+ with LockTimeout(timeout, filename):
180+ while True:
181+ try:
182+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
183+ break
184+ except IOError, err:
185+ if err.errno != errno.EAGAIN:
186+ raise
187+ sleep(0.01)
188+ mode = 'r+'
189+ if append:
190+ mode = 'a+'
191+ file_obj = os.fdopen(fd, mode)
192+ yield file_obj
193+ finally:
194+ try:
195+ file_obj.close()
196+ except UnboundLocalError:
197+ pass # may have not actually opened the file
198+ if unlink:
199+ os.unlink(filename)
200+
201+
202 def lock_parent_directory(filename, timeout=10):
203 """
204 Context manager that acquires a lock on the parent directory of the given
205@@ -1030,3 +1074,33 @@
206 if index == -1:
207 return '%d' % value
208 return '%d%si' % (round(value), suffixes[index])
209+
210+
211+def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2):
212+ """Update recon cache values
213+
214+ :param cache_key: key to update
215+ :param cache_value: value you want to set key too
216+ :param cache_file: cache file to update
217+ :param lock_timeout: timeout (in seconds)
218+ """
219+ with lock_file(cache_file, lock_timeout, unlink=False) as cf:
220+ cache_entry = {}
221+ try:
222+ existing_entry = cf.readline()
223+ if existing_entry:
224+ cache_entry = json.loads(existing_entry)
225+ except ValueError:
226+ #file doesn't have a valid entry, we'll recreate it
227+ pass
228+ cache_entry[cache_key] = cache_value
229+ try:
230+ with NamedTemporaryFile(delete=False) as tf:
231+ tf.write(json.dumps(cache_entry) + '\n')
232+ os.rename(tf.name, cache_file)
233+ finally:
234+ try:
235+ os.unlink(tf.name)
236+ except OSError, err:
237+ if err.errno != errno.ENOENT:
238+ raise
239
240=== modified file 'swift/obj/replicator.py'
241--- swift/obj/replicator.py 2011-08-02 17:46:17 +0000
242+++ swift/obj/replicator.py 2011-09-01 18:47:51 +0000
243@@ -32,7 +32,8 @@
244
245 from swift.common.ring import Ring
246 from swift.common.utils import whataremyips, unlink_older_than, lock_path, \
247- compute_eta, get_logger, write_pickle, renamer
248+ compute_eta, get_logger, write_pickle, renamer, dump_recon_cache, \
249+ TRUE_VALUES
250 from swift.common.bufferedhttp import http_connect
251 from swift.common.daemon import Daemon
252
253@@ -243,6 +244,11 @@
254 self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
255 self.http_timeout = int(conf.get('http_timeout', 60))
256 self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
257+ self.recon_enable = conf.get(
258+ 'recon_enable', 'no').lower() in TRUE_VALUES
259+ self.recon_cache_path = conf.get(
260+ 'recon_cache_path', '/var/cache/swift')
261+ self.recon_object = os.path.join(self.recon_cache_path, "object.recon")
262
263 def _rsync(self, args):
264 """
265@@ -578,6 +584,12 @@
266 total = (time.time() - start) / 60
267 self.logger.info(
268 _("Object replication complete. (%.02f minutes)"), total)
269+ if self.recon_enable:
270+ try:
271+ dump_recon_cache('object_replication_time', total, \
272+ self.recon_object)
273+ except Exception:
274+ self.logger.exception(_('Exception dumping recon cache'))
275
276 def run_forever(self, *args, **kwargs):
277 self.logger.info(_("Starting object replicator in daemon mode."))
278@@ -590,6 +602,12 @@
279 total = (time.time() - start) / 60
280 self.logger.info(
281 _("Object replication complete. (%.02f minutes)"), total)
282+ if self.recon_enable:
283+ try:
284+ dump_recon_cache('object_replication_time', total, \
285+ self.recon_object)
286+ except Exception:
287+ self.logger.exception(_('Exception dumping recon cache'))
288 self.logger.debug(_('Replication sleeping for %s seconds.'),
289 self.run_pause)
290 sleep(self.run_pause)