diff -Nru mogilefs-server-2.44/CHANGES mogilefs-server-2.44/CHANGES --- mogilefs-server-2.44/CHANGES 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/CHANGES 2012-08-14 07:29:15.000000000 +0000 @@ -1,3 +1,28 @@ +2012-08-13: Release version 2.65 + + * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) + Now requires minimum Postgres version of pg8.4. + + * reaper: switch to Danga::Socket for scheduling (Eric Wong ) + + * reaper: add queue_size_for_reaper server setting (Eric Wong ) + + * reaper: add "queue_rate_for_reaper" server setting (Eric Wong ) + defaults to 1000, same as previously. + + * reaper: global lock around DB interaction (Eric Wong ) + prevents reapers clobbering each other, causing a reduction in DB writes. + + * tests: add basic test for reaper (Eric Wong ) + + * fix tests when /etc/mogilefs/mogstored.conf exists (Eric Wong ) + + * iostat: increase flexibility of iostat parser (Eric Wong ) + + * iostat: allow MOG_IOSTAT_CMD env override (Eric Wong ) + + * When a mogstored is down, die with a more informative message. (Dave Lambley ) + 2012-06-21: Release version 2.64 * Delete memcache data when we replicate fids (Pyry Hakulinen ) diff -Nru mogilefs-server-2.44/debian/bzr-builder.manifest mogilefs-server-2.44/debian/bzr-builder.manifest --- mogilefs-server-2.44/debian/bzr-builder.manifest 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/debian/bzr-builder.manifest 2012-08-14 07:29:15.000000000 +0000 @@ -1,2 +1,2 @@ -# bzr-builder format 0.3 deb-version {debupstream}-0~1268 -lp:~guilhem-fr/mogilefs/MogileFS-Server revid:git-v1:23322a1ef17315f7ebbcb5e053d46a0c136af026 +# bzr-builder format 0.3 deb-version {debupstream}-0~1285 +lp:~guilhem-fr/mogilefs/MogileFS-Server revid:git-v1:fae96638ab8d7f385fc9ab14577ce4cbe7fc9835 diff -Nru mogilefs-server-2.44/debian/changelog mogilefs-server-2.44/debian/changelog --- mogilefs-server-2.44/debian/changelog 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/debian/changelog 2012-08-14 07:29:15.000000000 +0000 @@ -1,8 +1,8 @@ -mogilefs-server (2.44-0~1268~natty1) natty; urgency=low +mogilefs-server (2.44-0~1285~natty1) natty; urgency=low * Auto build. - -- Guilhem Lettron Fri, 22 Jun 2012 06:06:54 +0000 + -- Guilhem Lettron Tue, 14 Aug 2012 07:29:15 +0000 mogilefs-server (2.44-1) stable; urgency=low diff -Nru mogilefs-server-2.44/lib/MogileFS/Config.pm mogilefs-server-2.44/lib/MogileFS/Config.pm --- mogilefs-server-2.44/lib/MogileFS/Config.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Config.pm 2012-08-14 07:29:15.000000000 +0000 @@ -365,7 +365,6 @@ # let slave settings go through unmodified, for now. if ($key =~ /^slave_/) { return $del_if_blank }; - if ($key eq "enable_rebalance") { return $bool }; if ($key eq "skip_devcount") { return $bool }; if ($key eq "skip_mkcol") { return $bool }; if ($key eq "case_sensitive_list_keys") { return $bool }; @@ -377,15 +376,6 @@ if ($key eq 'network_zones') { return $any }; if ($key =~ /^zone_/) { return $valid_netmask_list }; - if ($key eq "rebalance_policy") { return sub { - my $v = shift; - return undef unless $v; - # TODO: actually load the provided class and test if it loads? - die "Doesn't match acceptable format" unless - $v =~ /^[\w:\-]+$/; - return $v; - }} - # should probably restrict to (\d+) if ($key =~ /^queue_/) { return $any }; diff -Nru mogilefs-server-2.44/lib/MogileFS/Connection/Mogstored.pm mogilefs-server-2.44/lib/MogileFS/Connection/Mogstored.pm --- mogilefs-server-2.44/lib/MogileFS/Connection/Mogstored.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Connection/Mogstored.pm 2012-08-14 07:29:15.000000000 +0000 @@ -18,7 +18,7 @@ return $self->{sock} if $self->{sock}; $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, PeerPort => $self->{port}, - Timeout => $timeout); + Timeout => $timeout) or die "Could not connect to mogstored on ".$self->{ip}.":".$self->{port}; $self->{sock}->sockopt(SO_KEEPALIVE, 1); return $self->{sock}; } diff -Nru mogilefs-server-2.44/lib/MogileFS/Server.pm mogilefs-server-2.44/lib/MogileFS/Server.pm --- mogilefs-server-2.44/lib/MogileFS/Server.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Server.pm 2012-08-14 07:29:15.000000000 +0000 @@ -2,7 +2,7 @@ use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.64"; +$VERSION = "2.65"; =head1 NAME diff -Nru mogilefs-server-2.44/lib/MogileFS/Store/Postgres.pm mogilefs-server-2.44/lib/MogileFS/Store/Postgres.pm --- mogilefs-server-2.44/lib/MogileFS/Store/Postgres.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Store/Postgres.pm 2012-08-14 07:29:15.000000000 +0000 @@ -62,7 +62,8 @@ $self->SUPER::init; my $database_version = $self->dbh->get_info(18); # SQL_DBMS_VER # We need >=pg-8.2 because we use SAVEPOINT and ROLLBACK TO. - die "Postgres is too old! Must use >=postgresql-8.2!" if($database_version =~ /\A0[0-7]\.|08\.0[01]/); + # We need >=pg-8.4 for working advisory locks + die "Postgres is too old! Must use >=postgresql-8.4!" if($database_version =~ /\A0[0-7]\.|08\.0[0123]/); $self->{lock_depth} = 0; } @@ -787,27 +788,30 @@ # returns 1 on success and 0 on timeout sub get_lock { my ($self, $lockname, $timeout) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); - die "Lock recursion detected (grabbing $lockname ($lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + die sprintf("Lock recursion detected (grabbing %s on %s (%s/%s), had %s (%s). Bailing out.", $lockname, hostname, $hostid, $lockid, $self->{last_lock}, lockid($self->{last_lock})) if $self->{lock_depth}; debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; my $lock = undef; - while($timeout >= 0 and not defined($lock)) { - $lock = eval { $self->dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, hostname, $$) }; - if($self->was_duplicate_error) { - $timeout--; - sleep 1 if $timeout > 0; - next; - } + while($timeout >= 0) { + $lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $hostid, $lockid); $self->condthrow; - #$lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $lockid, $timeout); - #warn("$$ Lock result=$lock\n"); - if (defined $lock and $lock == 1) { - $self->{lock_depth} = 1; - $self->{last_lock} = $lockname; + if (defined $lock) { + if($lock == 1) { + $self->{lock_depth} = 1; + $self->{last_lock} = $lockname; + last; + } elsif($lock == 0) { + sleep 1 if $timeout > 0; + $timeout--; + next; + } else { + die "Something went horribly wrong while getting lock $lockname - unknown return value"; + } } else { - die "Something went horribly wrong while getting lock $lockname"; + die "Something went horribly wrong while getting lock $lockname - undefined lock"; } } return $lock; @@ -817,10 +821,10 @@ # returns 1 on success and 0 if no lock we have has that name. sub release_lock { my ($self, $lockname) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); debug("$$ Unlocking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; - #my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?)", undef, $lockid); - my $rv = $self->dbh->do('DELETE FROM lock WHERE lockid=? AND pid=? AND hostname=?', undef, $lockid, $$, hostname); + my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?, ?)", undef, $hostid, $lockid); debug("Double-release of lock $lockname!") if $self->{lock_depth} != 0 and $rv == 0 and $Mgd::DEBUG >= 2; $self->condthrow; $self->{lock_depth} = 0; diff -Nru mogilefs-server-2.44/lib/MogileFS/Store.pm mogilefs-server-2.44/lib/MogileFS/Store.pm --- mogilefs-server-2.44/lib/MogileFS/Store.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Store.pm 2012-08-14 07:29:15.000000000 +0000 @@ -2160,6 +2160,21 @@ $self->dbh->do("DELETE FROM checksum WHERE fid = ?", undef, $fidid); } +# setup the value used in a 'nexttry' field to indicate that this item will +# never actually be tried again and require some sort of manual intervention. +use constant ENDOFTIME => 2147483647; + +sub end_of_time { ENDOFTIME; } + +# returns the size of the non-urgent replication queue +# nexttry == 0 - the file is urgent +# nexttry != 0 && nexttry < ENDOFTIME - the file is deferred +sub deferred_repl_queue_length { + my ($self) = @_; + + return $self->dbh->selectrow_array('SELECT COUNT(*) FROM file_to_replicate WHERE nexttry != 0 AND nexttry < ?', undef, $self->end_of_time); +} + 1; __END__ diff -Nru mogilefs-server-2.44/lib/MogileFS/Test.pm mogilefs-server-2.44/lib/MogileFS/Test.pm --- mogilefs-server-2.44/lib/MogileFS/Test.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Test.pm 2012-08-14 07:29:15.000000000 +0000 @@ -123,6 +123,7 @@ die "Failed: tracker already running on port 7500?\n" if $conn; $ENV{PERL5LIB} .= ":$Bin/../lib"; my @args = ("$Bin/../mogstored", + "--skipconfig", "--httplisten=$ip:7500", "--mgmtlisten=$ip:7501", "--maxconns=1000", # because we're not root, put it below 1024 diff -Nru mogilefs-server-2.44/lib/MogileFS/Worker/Reaper.pm mogilefs-server-2.44/lib/MogileFS/Worker/Reaper.pm --- mogilefs-server-2.44/lib/MogileFS/Worker/Reaper.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Worker/Reaper.pm 2012-08-14 07:29:15.000000000 +0000 @@ -4,8 +4,13 @@ use strict; use base 'MogileFS::Worker'; use MogileFS::Server; -use MogileFS::Util qw(every error debug); +use MogileFS::Util qw(error debug); use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); +use constant REAP_INTERVAL => 5; +use constant REAP_BACKOFF_MIN => 60; + +# completely forget about devices we've reaped after 2 hours of idleness +use constant REAP_BACKOFF_MAX => 7200; sub new { my ($class, $psock) = @_; @@ -19,56 +24,154 @@ return 240; } -my %all_empty; # devid -> bool, if all empty of files in file_on +# order is important here: +# +# first, add fid to file_to_replicate table. it +# shouldn't matter if the replicator gets to this +# before the subsequent 'forget_about' method, as the +# replicator will treat dead file_on devices as +# non-existent anyway. however, it is important that +# we enqueue it for replication first, before we +# forget about that file_on row, otherwise a failure +# after/during 'forget_about' could leave a stranded +# file on a dead device and we'd never fix it. +sub reap_fid { + my ($self, $fid, $dev) = @_; + + $fid->enqueue_for_replication(in => 1); + $dev->forget_about($fid); +} + +# this returns 1000 by default +sub reaper_inject_limit { + my ($self) = @_; + + my $sto = Mgd::get_store(); + my $max = MogileFS::Config->server_setting_cached('queue_size_for_reaper'); + my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; + + # max defaults to zero, meaning we inject $limit every wakeup + if ($max) { + # if a queue size limit is configured for reaper, prevent too many + # files from entering the repl queue: + my $len = $sto->deferred_repl_queue_length; + my $space_left = $max - $len; + + $limit = $space_left if ($limit > $space_left); + + # limit may end up being negative here since other processes + # can inject into the deferred replication queue, reaper is + # the only one which can respect this queue size + $limit = 0 if $limit < 0; + } + + return $limit; +} + +# we pass the $devid here (instead of a Device object) to avoid +# potential memory leaks since this sub reschedules itself to run +# forever. $delay is the current delay we were scheduled at +sub reap_dev { + my ($self, $devid, $delay) = @_; + my $limit = $self->reaper_inject_limit; + + # just in case a user mistakenly nuked a devid from the device table: + my $dev = Mgd::device_factory()->get_by_id($devid); + unless ($dev) { + error("No device row for dev$devid, cannot reap"); + $delay = undef; + } + + # limit == 0 if we hit the queue size limit, we'll just reschedule + if ($limit && $dev) { + my $sto = Mgd::get_store(); + my $lock = "mgfs:reaper"; + my $lock_timeout = $self->watchdog_timeout / 4; + my @fids; + + if ($sto->get_lock($lock, $lock_timeout)) { + @fids = $dev->fid_list(limit => $limit); + if (@fids) { + $self->still_alive; + foreach my $fid (@fids) { + $self->reap_fid($fid, $dev); + } + } + $sto->release_lock($lock); + + # if we've found any FIDs (perhaps even while backing off) + # ensure we try to find more soon: + if (@fids) { + $delay = REAP_INTERVAL; + } else { + $delay = $self->reap_dev_backoff_delay($delay); + } + } else { + # No lock after a long lock_timeout? Try again soon. + # We should never get here under MySQL, and rarely for other DBs. + debug("get_lock($lock, $lock_timeout) failed"); + $delay = REAP_INTERVAL; + } + } + + return unless defined $delay; + + # schedule another update, delay could be REAP_BACKOFF_MAX + Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); +} + +# called when we're hopefully all done with a device, but reschedule +# into the future in case the replicator had an out-of-date cache and the +# "dead" device was actually writable. +sub reap_dev_backoff_delay { + my ($self, $delay) = @_; + + return REAP_BACKOFF_MIN if ($delay < REAP_BACKOFF_MIN); + $delay *= 2; + return $delay > REAP_BACKOFF_MAX ? undef : $delay; +} + +# looks for dead devices sub work { my $self = shift; - every(5, sub { + # we just forked from our parent process, also using Danga::Socket, + # so we need to lose all that state and start afresh. + Danga::Socket->Reset; + + # ensure we get monitor updates + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + + my %devid_seen; + my $reap_check; + $reap_check = sub { # get db and note we're starting a run debug("Reaper running; looking for dead devices"); + $self->still_alive; foreach my $dev (grep { $_->dstate->is_perm_dead } Mgd::device_factory()->get_all) { - my $devid = $dev->id; - next if $all_empty{$devid}; + next if $devid_seen{$dev->id}; - my @fids = $dev->fid_list(limit => 1000); - unless (@fids) { - $all_empty{$devid} = 1; - next; - } - $self->still_alive; + # delay the initial device reap in case any replicator cache + # thinks the device is still alive + Danga::Socket->AddTimer(DEVICE_SUMMARY_CACHE_TIMEOUT + 1, sub { + $self->reap_dev($dev->id, REAP_INTERVAL); + }); + + # once we've seen a device, reap_dev will takeover scheduling + # reaping for the given device. + $devid_seen{$dev->id} = 1; + } - foreach my $fid (@fids) { - # order is important here: + Danga::Socket->AddTimer(REAP_INTERVAL, $reap_check); + }; - # first, add fid to file_to_replicate table. it - # shouldn't matter if the replicator gets to this - # before the subsequent 'forget_about' method, as the - # replicator will treat dead file_on devices as - # non-existent anyway. however, it is important that - # we enqueue it for replication first, before we - # forget about that file_on row, otherwise a failure - # after/during 'forget_about' could leave a stranded - # file on a dead device and we'd never fix it. - # - # and just for extra safety, in case replication happened - # on another machine after 'enqueue_for_replication' but - # before 'forget_about', and that other machine hadn't yet - # re-read the device table to learn that this device - # was dead, we delay the replication for the amount of time - # that the device summary table is valid for (presumably - # the other trackers are running identical software, or - # at least have the same timeout value) - - $fid->enqueue_for_replication(in => DEVICE_SUMMARY_CACHE_TIMEOUT + 1); - $dev->forget_about($fid); - $fid->update_devcount; - } - } - }); + # kick off the reaper and loop forever + $reap_check->(); + Danga::Socket->EventLoop; } 1; diff -Nru mogilefs-server-2.44/lib/MogileFS/Worker/Replicate.pm mogilefs-server-2.44/lib/MogileFS/Worker/Replicate.pm --- mogilefs-server-2.44/lib/MogileFS/Worker/Replicate.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/MogileFS/Worker/Replicate.pm 2012-08-14 07:29:15.000000000 +0000 @@ -15,12 +15,6 @@ use Digest; use MIME::Base64 qw(encode_base64); -# setup the value used in a 'nexttry' field to indicate that this item will never -# actually be tried again and require some sort of manual intervention. -use constant ENDOFTIME => 2147483647; - -sub end_of_time { ENDOFTIME; } - sub new { my ($class, $psock) = @_; my $self = fields::new($class); @@ -140,7 +134,7 @@ # special; update to a time that won't happen again, # as we've encountered a scenario in which case we're # really hosed - $sto->reschedule_file_to_replicate_absolute($fid, ENDOFTIME); + $sto->reschedule_file_to_replicate_absolute($fid, $sto->end_of_time); } elsif ($type eq "offset") { $sto->reschedule_file_to_replicate_relative($fid, $delay+0); } else { diff -Nru mogilefs-server-2.44/lib/Mogstored/ChildProcess/IOStat.pm mogilefs-server-2.44/lib/Mogstored/ChildProcess/IOStat.pm --- mogilefs-server-2.44/lib/Mogstored/ChildProcess/IOStat.pm 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/lib/Mogstored/ChildProcess/IOStat.pm 2012-08-14 07:29:15.000000000 +0000 @@ -4,7 +4,7 @@ my $docroot; -my $iostat_cmd = "iostat -dx 1 30"; +my $iostat_cmd = $ENV{MOG_IOSTAT_CMD} || "iostat -dx 1 30"; if ($^O =~ /darwin/) { $iostat_cmd =~ s/x// } sub pre_exec_init { @@ -66,26 +66,21 @@ my $mog_sysid = mog_sysid_map(); # 5 (mogdevid) -> 2340 (os devid) my $dev_sysid = {}; # hashref, populated lazily: { /dev/sdg => system dev_t } my %devt_util; # dev_t => 52.55 - my $init = 0; + my $stats = 0; while (<$iofh>) { - if (m/^Device:/) { - %devt_util = (); - $init = 1; - next; - } - next unless $init; - if (m/^ (\S+) .*? ([\d.]+) \n/x) { + if (m/^\s*(\S+)\s.*?([\d.]+)\s*$/) { my ($devnode, $util) = ("/dev/$1", $2); unless (exists $dev_sysid->{$devnode}) { $dev_sysid->{$devnode} = (stat($devnode))[6]; # rdev } my $devt = $dev_sysid->{$devnode}; $devt_util{$devt} = $util; - next; - } - # blank line is the end. - if (m!^\s*\n!) { - $init = 0; + $stats++; + } elsif ($stats) { + # blank line is the end, or any other line we don't understand + # if we have stats, we print them, otherwise do nothing + $stats = 0; + my $ret = ""; foreach my $mogdevid (sort { $a <=> $b } keys %$mog_sysid) { my $devt = $mog_sysid->{$mogdevid}; @@ -96,7 +91,7 @@ print $ret; $check_for_parent->(); - next; + %devt_util = (); } } } diff -Nru mogilefs-server-2.44/MANIFEST mogilefs-server-2.44/MANIFEST --- mogilefs-server-2.44/MANIFEST 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/MANIFEST 2012-08-14 07:29:15.000000000 +0000 @@ -76,6 +76,7 @@ t/40-httpfile.t t/50-checksum.t t/60-fsck.t +t/70-reaper.t t/checksum.t t/fid-stat.t t/mogstored-shutdown.t diff -Nru mogilefs-server-2.44/mogstored mogilefs-server-2.44/mogstored --- mogilefs-server-2.44/mogstored 2012-06-22 06:06:54.000000000 +0000 +++ mogilefs-server-2.44/mogstored 2012-08-14 07:29:15.000000000 +0000 @@ -37,6 +37,7 @@ my ($iostat_pipe_r, $iostat_pipe_w); # pipes for talking to iostat process # Config: +my $opt_skipconfig; my $opt_daemonize; my $opt_config; my $opt_iostat = 1; # default to on now @@ -54,6 +55,7 @@ my %config_opts = ( 'iostat' => \$opt_iostat, + 's|skipconfig' => \$opt_skipconfig, 'daemonize|d' => \$opt_daemonize, 'config=s' => \$opt_config, 'httplisten=s' => \$http_listen, @@ -70,7 +72,7 @@ unless $server =~ /^perlbal|lighttpd|apache|none$/; $opt_config = $default_config if ! $opt_config && -e $default_config; -load_config_file($opt_config => \%config_opts) if $opt_config; +load_config_file($opt_config => \%config_opts) if $opt_config && !$opt_skipconfig; # initialize basic required Perlbal machinery, for any HTTP server my $perlbal_init = qq{ diff -Nru mogilefs-server-2.44/t/70-reaper.t mogilefs-server-2.44/t/70-reaper.t --- mogilefs-server-2.44/t/70-reaper.t 1970-01-01 00:00:00.000000000 +0000 +++ mogilefs-server-2.44/t/70-reaper.t 2012-08-14 07:29:15.000000000 +0000 @@ -0,0 +1,95 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +find_mogclient_or_skip(); +use MogileFS::Admin; +use File::Temp; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 1, 3 => 1 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored"); + +while (! -e "$mogroot{1}/dev1/usage" || + ! -e "$mogroot{1}/dev2/usage" || + ! -e "$mogroot{1}/dev3/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 2), "created dev2 on hostA"); + +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("settings", "set", "queue_rate_for_reaper", 123), "set queue_rate_for_reaper"); + +# create one sample file with 2 copies +my $fh = $mogc->new_file("file1", "2copies"); +ok($fh, "got filehandle"); +ok(close($fh), "closed file"); + +my $tries; +my @urls; + +# wait for it to replicate +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(@urls) == 2; + sleep .1; +} + +is(scalar(@urls), 2, "replicated to 2 paths"); +my $orig_urls = join("\n", sort(@urls)); + +# add a new device and mark an existing device as dead +ok($tmptrack->mogadm("device", "add", "hostA", 3), "created dev3 on hostA"); +ok($tmptrack->mogadm("device", "mark", "hostA", 2, "dead"), "mark dev2 as dead"); + +# reaper should notice the dead device in 5-10s +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(grep(m{/dev2/}, @urls)) == 0; + sleep 0.1; +} +is(scalar(grep(m{/dev2/}, @urls)), 0, "file1 no longer references dead dev2"); + +# replicator should replicate the file within 15-30s +for ($tries = 300; $tries--; ) { + @urls = sort($mogc->get_paths("file1")); + last if (scalar(@urls) == 2) && (join("\n", @urls) ne $orig_urls); + sleep 0.1; +} +is(grep(m{/dev3/}, @urls), 1, "file1 got copied to dev3"); +is(scalar(@urls), 2, "we have 2 paths for file1 again"); + +done_testing();