Merge lp:~pandemicsyn/swift/recon-replication-cleanup into lp:~hudson-openstack/swift/trunk

Proposed by Florian Hines
Status: Merged
Approved by: David Goetz
Approved revision: 353
Merged at revision: 353
Proposed branch: lp:~pandemicsyn/swift/recon-replication-cleanup
Merge into: lp:~hudson-openstack/swift/trunk
Diff against target: 290 lines (+162/-59)
4 files modified
bin/swift-recon-cron (+64/-56)
swift/common/middleware/recon.py (+4/-1)
swift/common/utils.py (+75/-1)
swift/obj/replicator.py (+19/-1)
To merge this branch: bzr merge lp:~pandemicsyn/swift/recon-replication-cleanup
Reviewer Review Type Date Requested Status
David Goetz (community) Approve
John Dickinson Approve
Review via email: mp+73557@code.launchpad.net

Description of the change

obj replicator can now log replication stats for recon directly:

in object-server.conf:

[object-replicator]
vm_test_mode = yes
recon_enable = yes
recon_cache_path = /var/cache/swift

Also replaced the swift-recon bash cronjob with a friendlier/cleaner python version, that now only obtains async stats. Basic usage:

$ bin/swift-recon-cron
Usage: swift-recon-cron CONF_FILE

#CONF_FILE = path to your object-server.conf

$ bin/swift-recon-cron /etc/swift/object-server.conf

To post a comment you must log in.
352. By Florian Hines

pep8

Revision history for this message
John Dickinson (notmyname) wrote :

yay

review: Approve
Revision history for this message
David Goetz (david-goetz) wrote :

I haven't run this yet but here's some stuff just from looking at it:

instead of:
import simplejson

do:

try:
    import simplejson as json
except ImportError:
    import json

and then use json instead of simplejson in the code.
------------------

for this:
 try:
105
+ os.mkdir("/var/lock/swift-recon-object-cron")
106
+ except OSError as e:
107
+ logger.critical("%s" % e)
108
+ sys.exit(1)

maybe print the error to make it easier to debug

------------------

use utils.TRUE_VALUES here

self.recon_enable = conf.get(
234
+ 'recon_enable', 'no').lower() in ('yes', 'true', 'on', '1')

------------------

except Exception:
250
+ self.logger.exception(_('Exception dumping recon cache'))

maybe be useful to log the actual exception

review: Needs Fixing
353. By Florian Hines

simplejson import and exception/logging fixes

Revision history for this message
David Goetz (david-goetz) wrote :

looks good

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bin/swift-recon-cron'
--- bin/swift-recon-cron 2011-07-28 03:29:23 +0000
+++ bin/swift-recon-cron 2011-09-01 18:47:51 +0000
@@ -1,56 +1,64 @@
1#!/bin/bash1#!/usr/bin/env python
22"""
3#ghetto temporary cronjob to pull some of the stats for swift-recon3swift-recon-cron.py
4#usage: swift-recon-cron /var/log/swift/storage.log 4"""
5# run it as frequently as you like, will skip runs during periods5
6# of high async pendings when the find takes a while.6import os
7#todo: everything.7import sys
88import optparse
9SYSLOG_FACILITY="local2"9from tempfile import NamedTemporaryFile
10ASYNC_PATH="/srv/node/sd[a-z]/async_pending/"10try:
11RECON_CACHE_PATH="/var/cache/swift"11 import simplejson as json
1212except ImportError:
13LOCKFILE="/var/lock/swift-recon-object.lock"13 import json
14if [ -e $LOCKFILE ]; then14from ConfigParser import ConfigParser
15 echo "NOTICE - $0 lock present - cron jobs overlapping ?" 15from swift.common.utils import get_logger, dump_recon_cache
16 echo "$0 lock file present" | /usr/bin/logger -p $SYSLOG_FACILITY.err16
17 exit 117
18else18def async_count(device_dir, logger):
19 touch $LOCKFILE19 async_count = 0
20fi20 for i in os.listdir(device_dir):
2121 asyncdir = os.path.join(device_dir, i, "async_pending")
2222 if os.path.isdir(asyncdir):
23if [ -z "$1" ]; then23 for entry in os.listdir(asyncdir):
24 LOGFILE="/var/log/swift/storage.log"24 if os.path.isdir(os.path.join(asyncdir, entry)):
25else25 async_hdir = os.path.join(asyncdir, entry)
26 LOGFILE=$126 async_count += len(os.listdir(async_hdir))
27fi27 return async_count
2828
29if [ ! -r "$LOGFILE" ]; then29
30 echo "$0: error $LOGFILE not readable" | /usr/bin/logger -p $SYSLOG_FACILITY.err30def main():
31 rm $LOCKFILE31 c = ConfigParser()
32 exit 132 try:
33fi33 conf_path = sys.argv[1]
3434 except Exception:
35if [ ! -d "$RECON_CACHE_PATH" ]; then35 print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]
36 mkdir $RECON_CACHE_PATH36 print "ex: swift-recon-cron /etc/swift/object-server.conf"
37fi37 sys.exit(1)
3838 if not c.read(conf_path):
39TMPF=`/bin/mktemp`39 print "Unable to read config file %s" % conf_path
4040 sys.exit(1)
41asyncs=$(find $ASYNC_PATH -type f 2> /dev/null| wc -l)41 conf = dict(c.items('filter:recon'))
42#asyncs=$(find /srv/[1-4]/node/sd[a-z]1/async_pending/ -type f 2> /dev/null| wc -l) #saio42 device_dir = conf.get('devices', '/srv/node')
43objrep=$(grep "Object replication complete." $LOGFILE | tail -n 1 | awk '{print $9}' | sed -e 's/(//g')43 recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift')
44objincoming=$(netstat -aln | egrep "tcp.*:6000.*:.*ESTABLISHED" -c)44 cache_file = os.path.join(recon_cache_path, "object.recon")
45#objtw=$(netstat -aln | egrep "tcp.*:6000.*:.*TIME_WAIT" -c)45 conf['log_name'] = conf.get('log_name', 'recon-cron')
4646 logger = get_logger(conf, log_route='recon-cron')
47echo "{\"async_pending\":$asyncs, \"object_replication_time\":$objrep, \"object_established_conns\":$objincoming}" > $TMPF47 try:
4848 os.mkdir("/var/lock/swift-recon-object-cron")
49mv $TMPF $RECON_CACHE_PATH/object.recon49 except OSError as e:
50if [ $? -ne 0 ]; then50 logger.critical(_(str(e)))
51 echo "$0: $TMPF rename failed" | /usr/bin/logger -p $SYSLOG_FACILITY.err51 print str(e)
52 rm -f $TMPF $LOCKFILE52 sys.exit(1)
53 exit 153 asyncs = async_count(device_dir, logger)
54fi54 try:
55rm -f $TMPF $LOCKFILE55 dump_recon_cache('async_pending', asyncs, cache_file)
56exit 056 except Exception:
57 logger.exception(_('Exception dumping recon cache'))
58 try:
59 os.rmdir("/var/lock/swift-recon-object-cron")
60 except Exception:
61 logger.exception(_('Exception remove cronjob lock'))
62
63if __name__ == '__main__':
64 main()
5765
=== modified file 'swift/common/middleware/recon.py'
--- swift/common/middleware/recon.py 2011-08-14 15:49:15 +0000
+++ swift/common/middleware/recon.py 2011-09-01 18:47:51 +0000
@@ -17,7 +17,10 @@
17from swift.common.utils import split_path, cache_from_env, get_logger17from swift.common.utils import split_path, cache_from_env, get_logger
18from swift.common.constraints import check_mount18from swift.common.constraints import check_mount
19from hashlib import md519from hashlib import md5
20import simplejson as json20try:
21 import simplejson as json
22except ImportError:
23 import json
21import os24import os
2225
2326
2427
=== modified file 'swift/common/utils.py'
--- swift/common/utils.py 2011-08-15 21:09:11 +0000
+++ swift/common/utils.py 2011-09-01 18:47:51 +0000
@@ -33,7 +33,11 @@
33from ConfigParser import ConfigParser, NoSectionError, NoOptionError, \33from ConfigParser import ConfigParser, NoSectionError, NoOptionError, \
34 RawConfigParser34 RawConfigParser
35from optparse import OptionParser35from optparse import OptionParser
36from tempfile import mkstemp36from tempfile import mkstemp, NamedTemporaryFile
37try:
38 import simplejson as json
39except ImportError:
40 import json
37import cPickle as pickle41import cPickle as pickle
38import glob42import glob
39from urlparse import urlparse as stdlib_urlparse, ParseResult43from urlparse import urlparse as stdlib_urlparse, ParseResult
@@ -634,6 +638,46 @@
634 os.close(fd)638 os.close(fd)
635639
636640
641@contextmanager
642def lock_file(filename, timeout=10, append=False, unlink=True):
643 """
644 Context manager that acquires a lock on a file. This will block until
645 the lock can be acquired, or the timeout time has expired (whichever occurs
646 first).
647
648 :param filename: file to be locked
649 :param timeout: timeout (in seconds)
650 :param append: True if file should be opened in append mode
651 :param unlink: True if the file should be unlinked at the end
652 """
653 flags = os.O_CREAT | os.O_RDWR
654 if append:
655 flags |= os.O_APPEND
656 fd = os.open(filename, flags)
657 try:
658 with LockTimeout(timeout, filename):
659 while True:
660 try:
661 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
662 break
663 except IOError, err:
664 if err.errno != errno.EAGAIN:
665 raise
666 sleep(0.01)
667 mode = 'r+'
668 if append:
669 mode = 'a+'
670 file_obj = os.fdopen(fd, mode)
671 yield file_obj
672 finally:
673 try:
674 file_obj.close()
675 except UnboundLocalError:
676 pass # may have not actually opened the file
677 if unlink:
678 os.unlink(filename)
679
680
637def lock_parent_directory(filename, timeout=10):681def lock_parent_directory(filename, timeout=10):
638 """682 """
639 Context manager that acquires a lock on the parent directory of the given683 Context manager that acquires a lock on the parent directory of the given
@@ -1030,3 +1074,33 @@
1030 if index == -1:1074 if index == -1:
1031 return '%d' % value1075 return '%d' % value
1032 return '%d%si' % (round(value), suffixes[index])1076 return '%d%si' % (round(value), suffixes[index])
1077
1078
1079def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2):
1080 """Update recon cache values
1081
1082 :param cache_key: key to update
1083 :param cache_value: value you want to set key too
1084 :param cache_file: cache file to update
1085 :param lock_timeout: timeout (in seconds)
1086 """
1087 with lock_file(cache_file, lock_timeout, unlink=False) as cf:
1088 cache_entry = {}
1089 try:
1090 existing_entry = cf.readline()
1091 if existing_entry:
1092 cache_entry = json.loads(existing_entry)
1093 except ValueError:
1094 #file doesn't have a valid entry, we'll recreate it
1095 pass
1096 cache_entry[cache_key] = cache_value
1097 try:
1098 with NamedTemporaryFile(delete=False) as tf:
1099 tf.write(json.dumps(cache_entry) + '\n')
1100 os.rename(tf.name, cache_file)
1101 finally:
1102 try:
1103 os.unlink(tf.name)
1104 except OSError, err:
1105 if err.errno != errno.ENOENT:
1106 raise
10331107
=== modified file 'swift/obj/replicator.py'
--- swift/obj/replicator.py 2011-08-02 17:46:17 +0000
+++ swift/obj/replicator.py 2011-09-01 18:47:51 +0000
@@ -32,7 +32,8 @@
3232
33from swift.common.ring import Ring33from swift.common.ring import Ring
34from swift.common.utils import whataremyips, unlink_older_than, lock_path, \34from swift.common.utils import whataremyips, unlink_older_than, lock_path, \
35 compute_eta, get_logger, write_pickle, renamer35 compute_eta, get_logger, write_pickle, renamer, dump_recon_cache, \
36 TRUE_VALUES
36from swift.common.bufferedhttp import http_connect37from swift.common.bufferedhttp import http_connect
37from swift.common.daemon import Daemon38from swift.common.daemon import Daemon
3839
@@ -243,6 +244,11 @@
243 self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')244 self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
244 self.http_timeout = int(conf.get('http_timeout', 60))245 self.http_timeout = int(conf.get('http_timeout', 60))
245 self.lockup_timeout = int(conf.get('lockup_timeout', 1800))246 self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
247 self.recon_enable = conf.get(
248 'recon_enable', 'no').lower() in TRUE_VALUES
249 self.recon_cache_path = conf.get(
250 'recon_cache_path', '/var/cache/swift')
251 self.recon_object = os.path.join(self.recon_cache_path, "object.recon")
246252
247 def _rsync(self, args):253 def _rsync(self, args):
248 """254 """
@@ -578,6 +584,12 @@
578 total = (time.time() - start) / 60584 total = (time.time() - start) / 60
579 self.logger.info(585 self.logger.info(
580 _("Object replication complete. (%.02f minutes)"), total)586 _("Object replication complete. (%.02f minutes)"), total)
587 if self.recon_enable:
588 try:
589 dump_recon_cache('object_replication_time', total, \
590 self.recon_object)
591 except Exception:
592 self.logger.exception(_('Exception dumping recon cache'))
581593
582 def run_forever(self, *args, **kwargs):594 def run_forever(self, *args, **kwargs):
583 self.logger.info(_("Starting object replicator in daemon mode."))595 self.logger.info(_("Starting object replicator in daemon mode."))
@@ -590,6 +602,12 @@
590 total = (time.time() - start) / 60602 total = (time.time() - start) / 60
591 self.logger.info(603 self.logger.info(
592 _("Object replication complete. (%.02f minutes)"), total)604 _("Object replication complete. (%.02f minutes)"), total)
605 if self.recon_enable:
606 try:
607 dump_recon_cache('object_replication_time', total, \
608 self.recon_object)
609 except Exception:
610 self.logger.exception(_('Exception dumping recon cache'))
593 self.logger.debug(_('Replication sleeping for %s seconds.'),611 self.logger.debug(_('Replication sleeping for %s seconds.'),
594 self.run_pause)612 self.run_pause)
595 sleep(self.run_pause)613 sleep(self.run_pause)