From e5e86d3f4baa805a5c26c6b493f8e56e4b3633a3 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 23 Sep 2010 16:26:51 -0700 Subject: [PATCH 001/405] search $PATH for mogadm util in test suite --- lib/MogileFS/Test.pm | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 561b6d87..75ab2466 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -33,7 +33,7 @@ sub find_mogclient_or_skip { } unless (eval { TrackerHandle::_mogadm_exe() }) { - warn "Can't find mogadm utility.\n"; + warn "Can't find mogadm utility $@\n"; Test::More::plan('skip_all' => "Can't find mogadm executable, necessary for testing."); } @@ -174,13 +174,15 @@ sub ipport { my $_mogadm_exe_cache; sub _mogadm_exe { return $_mogadm_exe_cache if $_mogadm_exe_cache; - foreach my $exe ("$FindBin::Bin/../../utils/mogadm", - "$FindBin::Bin/../../../utils/mogadm", - "/usr/bin/mogadm", - "/usr/sbin/mogadm", - "/usr/local/bin/mogadm", - "/usr/local/sbin/mogadm", + for my $dir ("$FindBin::Bin/../../utils", + "$FindBin::Bin/../../../utils", + split(/:/, $ENV{PATH}), + "/usr/bin", + "/usr/sbin", + "/usr/local/bin", + "/usr/local/sbin", ) { + my $exe = $dir . '/mogadm'; return $_mogadm_exe_cache = $exe if -x $exe; } die "mogadm executable not found.\n"; From fa30b6aa44e870251079edcc158388549de57060 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 24 Sep 2010 15:13:20 -0700 Subject: [PATCH 002/405] Reduce unnecessary sleeping and parent traffic. The only place that ever told 'every' how often to sleep right was the fsck worker. Now everyone does it closer to correctly. Reducing the sleeps also required a minor overhaul of all the parent_ping calls. Workers no longer wake up unnecessarily and no should submit far fewer noise to the parent process, which should help increase throughput overall. --- CHANGES | 2 ++ lib/MogileFS/Util.pm | 19 +++++-------------- lib/MogileFS/Worker.pm | 3 ++- lib/MogileFS/Worker/Delete.pm | 2 -- lib/MogileFS/Worker/Fsck.pm | 10 +++------- lib/MogileFS/Worker/JobMaster.pm | 14 ++++++++++---- lib/MogileFS/Worker/Reaper.pm | 4 +--- lib/MogileFS/Worker/Replicate.pm | 16 +++------------- 8 files changed, 26 insertions(+), 44 deletions(-) diff --git a/CHANGES b/CHANGES index 6d5fea9b..dd8ce3d8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ + * Optimize worker sleep patterns and worker/parent communications. + * New rebalance/drain code. See the docs/wiki for how to use. ***NOTE*** Old drain/rebalance code is disabled. Setting a device into drain mode no longer does what you think it would. diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 5282eef5..d6d6a48d 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -34,7 +34,6 @@ sub every { my $now = Time::HiRes::time(); my $took = $now - $start; my $sleep_for = defined $explicit_sleep ? $explicit_sleep : ($delay - $took); - next unless $sleep_for > 0; # simple case, not in a child process (this never happens currently) unless ($psock_fd) { @@ -42,19 +41,11 @@ sub every { next; } - while ($sleep_for > 0) { - my $last_time_pre_sleep = $now; - $worker->forget_woken_up; - if (wait_for_readability($psock_fd, $sleep_for)) { - # TODO: uncomment this and watch an idle server and how many wakeups. could optimize. - #local $Mgd::POST_SLEEP_DEBUG = 1; - #warn "WOKEN UP FROM SLEEP in $worker [$$]\n"; - $worker->read_from_parent; - next CODERUN if $worker->was_woken_up; - } - $now = Time::HiRes::time(); - $sleep_for -= ($now - $last_time_pre_sleep); - } + Time::HiRes::sleep($sleep_for) if $sleep_for > 0; + #local $Mgd::POST_SLEEP_DEBUG = 1; + # This calls read_from_parent. Workers used to needlessly call + # parent_ping constantly. + $worker->parent_ping; } } diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 3b849b0e..d432a3a3 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -74,7 +74,7 @@ sub wait_for_monitor { sub still_alive { my $self = shift; my $now = time(); - if ($now > $self->{last_ping}) { + if ($now > $self->{last_ping} + ($self->watchdog_timeout / 4)) { $self->send_to_parent(":still_alive"); # a no-op, just for the watchdog $self->{last_ping} = $now; } @@ -134,6 +134,7 @@ sub read_from_parent { # while things are immediately available, # (or optionally sleep a bit) while (MogileFS::Util::wait_for_readability(fileno($psock), $timeout)) { + $timeout = 0; # only wait on the timeout for the first read. my $buf; my $rv = sysread($psock, $buf, 1024); if (!$rv) { diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index da7ebe5d..a838e96f 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -153,7 +153,6 @@ sub process_deletes2 { while (my $todo = shift @$queue_todo) { $self->still_alive; - $self->read_from_parent; # load all the devids related to this fid, and delete. my $fid = MogileFS::FID->new($todo->{fid}); @@ -270,7 +269,6 @@ sub process_deletes { last if ++$done > PER_BATCH; $self->still_alive; - $self->read_from_parent; my ($fid, $devid) = @$dm; error("deleting fid $fid, on devid ".($devid || 'NULL')."...") if $Mgd::DEBUG >= 2; diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 104dde08..13188c57 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -71,7 +71,7 @@ sub work { my $sto = Mgd::get_store(); my $max_checked = 0; - every(1.0, sub { + every(2.0, sub { my $sleep_set = shift; $nowish = time(); local $Mgd::nowish = $nowish; @@ -87,12 +87,8 @@ sub work { } my $queue_todo = $self->queue_todo('fsck'); - unless (@{$queue_todo}) { - $self->send_to_parent('worker_bored 50 fsck'); - $self->read_from_parent(1); - } else { - $self->parent_ping; - } + # This counts the same as a $self->still_alive; + $self->send_to_parent('worker_bored 50 fsck'); my @fids = (); while (my $todo = shift @{$queue_todo}) { diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 8fc0e29c..3108f420 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -50,10 +50,12 @@ sub work { $self->send_to_parent("queue_depth all"); my $sto = Mgd::get_store(); $self->read_from_parent(1); - $self->_check_replicate_queues($sto); - $self->_check_delete_queues($sto); - $self->_check_fsck_queues($sto); - $self->_check_rebal_queues($sto); + my $active = 0; + $active += $self->_check_replicate_queues($sto); + $active += $self->_check_delete_queues($sto); + $active += $self->_check_fsck_queues($sto); + $active += $self->_check_rebal_queues($sto); + $_[0]->(0) if $active; }); } @@ -71,6 +73,7 @@ sub _check_delete_queues { $self->send_to_parent("queue_todo delete " . _eurl_encode_args($todo)); } + return 1; } # NOTE: we only maintain one queue per worker, but we can easily @@ -99,6 +102,7 @@ sub _check_replicate_queues { $self->send_to_parent("queue_todo replicate " . _eurl_encode_args($todo)); } + return 1; } # FSCK is going to be a little odd... We still need a single "global" @@ -127,6 +131,7 @@ sub _check_fsck_queues { for my $todo (@to_fsck) { $self->send_to_parent("queue_todo fsck " . _eurl_encode_args($todo)); } + return 1; } sub _inject_fsck_queues { @@ -179,6 +184,7 @@ sub _check_rebal_queues { $todo->{_type} = 'rebalance'; $self->send_to_parent("queue_todo rebalance " . _eurl_encode_args($todo)); } + return 1; } sub _inject_rebalance_queues { diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index d3f1071d..c2071d55 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -24,8 +24,6 @@ sub work { my $self = shift; every(5, sub { - $self->parent_ping; - # get db and note we're starting a run debug("Reaper running; looking for dead devices"); @@ -40,7 +38,7 @@ sub work { $all_empty{$devid} = 1; next; } - $self->parent_ping; + $self->still_alive; foreach my $fid (@fids) { # order is important here: diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index f1c686b0..035dd584 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -37,7 +37,7 @@ sub work { # give the monitor job 15 seconds to give us an update my $warn_after = time() + 15; - every(2.0, sub { + every(1.0, sub { # replication doesn't go well if the monitor job hasn't actively started # marking things as being available unless ($self->monitor_has_run) { @@ -49,22 +49,16 @@ sub work { $self->validate_dbh; my $dbh = $self->get_dbh or return 0; my $sto = Mgd::get_store(); - $self->send_to_parent("worker_bored 100 replicate rebalance"); - # This is here on account of not being able to block on the parent :( - $self->read_from_parent(1); - # TODO: might need to sort types or create priority queues in the - # parent... would want "replicate" work to happen before rebalance. + my $queue_todo = $self->queue_todo('replicate'); my $queue_todo2 = $self->queue_todo('rebalance'); unless (@$queue_todo || @$queue_todo2) { - $self->parent_ping; return; } while (my $todo = shift @$queue_todo) { my $fid = $todo->{fid}; - $self->still_alive; $self->replicate_using_torepl_table($todo); } while (my $todo = shift @$queue_todo2) { @@ -81,11 +75,7 @@ sub work { # manually re-run rebalance to retry. $sto->delete_fid_from_file_to_queue($todo->{fid}, REBAL_QUEUE); } - # if replicators are otherwise idle, use them to make the world - # better, rebalancing things (if enabled), and draining devices (if - # any are marked drain) - #$self->rebalance_devices; - #$self->drain_devices; + $_[0]->(0); # don't sleep. }); } From a70a05f0b21e7027128e6d409a912648e6108d16 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 24 Sep 2010 17:48:41 -0700 Subject: [PATCH 003/405] filter all source devices. --- lib/MogileFS/Rebalance.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 0791abe7..1e62b329 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -406,7 +406,8 @@ sub filter_dest_devices { # skip anything we would source from. # FIXME: ends up not skipping stuff out of completed_devs? :/ - my %sdevs = map { $_ => 1 } @{$state->{source_devs}}; + my %sdevs = map { $_ => 1 } @{$state->{source_devs}}, + @{$state->{completed_devs}}, $state->{sdev_current}; my @devs = grep { ! $sdevs{$_} } @$devs; my @ddevs = (); From c3c262d5357824818a145cc31fe3549fafe0d0a8 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 24 Sep 2010 23:42:21 -0700 Subject: [PATCH 004/405] allow rebalance to be paused/restarted/reset also note times at which these things happen. --- lib/MogileFS/Rebalance.pm | 21 ++++++++++++++++ lib/MogileFS/Worker/JobMaster.pm | 15 ++++++++++++ lib/MogileFS/Worker/Query.pm | 41 +++++++++++++++++++++----------- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 1e62b329..e8059c80 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -53,6 +53,9 @@ my %default_state = ( sdev_limit => 0, fids_queued => 0, bytes_queued => 0, + time_started => 0, + time_finished => 0, + time_stopped => 0, ); sub new { @@ -83,9 +86,27 @@ sub init { # If we don't have an initial source device list, discover them. # Used to filter destination devices later. $state{source_devs} = $self->filter_source_devices($devs); + $state{time_started} = time(); $self->{state} = \%state; } +sub stop { + my $self = shift; + my $p = $self->{policy}; + my $s = $self->{state}; + my $sdev = $self->{sdev_current}; + unless ($p->{leave_in_drain_mode}) { + MogileFS::Device->of_devid($sdev)->set_state('alive') if $sdev; + } + $s->{time_stopped} = time(); +} + +sub finish { + my $self = shift; + my $s = $self->{state}; + $s->{time_finished} = time(); +} + # Resume from saved as_string state. sub load_state { my $self = shift; diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 3108f420..572dde27 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -205,6 +205,7 @@ sub _inject_rebalance_queues { # end of a run or ... I guess whenever the host sees it's not the rebal # host. my $rebal = MogileFS::Rebalance->new; + my $signal = MogileFS::Config->server_setting('rebal_signal'); my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); my $rebal_state = MogileFS::Config->server_setting('rebal_state'); $rebal->policy($rebal_pol); @@ -216,6 +217,17 @@ sub _inject_rebalance_queues { $rebal->init(\@devs); } + # Stopping is done via signal so we can note stop time in the state, + # and un-drain any devices that should be un-drained. + if ($signal && $signal eq 'stop') { + $rebal->stop; + $rebal_state = $rebal->save_state; + $sto->set_server_setting('rebal_signal', undef); + $sto->set_server_setting("rebal_host", undef); + $sto->set_server_setting('rebal_state', $rebal_state); + return; + } + my $devfids = $rebal->next_fids_to_rebalance(\@devs, $sto, $to_inject); # undefined means there's no work left. @@ -223,6 +235,9 @@ sub _inject_rebalance_queues { # Append some info to a rebalance log table? # Leave state in the system for inspection post-run. # TODO: Emit some sort of syslog/status line. + $rebal->finish; + $rebal_state = $rebal->save_state; + $sto->set_server_setting('rebal_state', $rebal_state); $sto->set_server_setting("rebal_host", undef); return; } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 02305d53..a8cc6f09 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1446,20 +1446,23 @@ sub cmd_rebalance_start { return $self->err_line("rebal_running", "rebalance is already running") if $rebal_host; return $self->err_line("fsck_running", "fsck running; cannot run rebalance at same time") if $fsck_host; - my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); - return $self->err_line('no_rebal_policy') unless $rebal_pol; - - my $rebal = MogileFS::Rebalance->new; - $rebal->policy($rebal_pol); - my @devs = MogileFS::Device->devices; - $rebal->init(\@devs); - my $sdevs = $rebal->source_devices; - - my $state = $rebal->save_state; - MogileFS::Config->set_server_setting('rebal_state', $state); + my $rebal_state = MogileFS::Config->server_setting('rebal_state'); + unless ($rebal_state) { + my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); + return $self->err_line('no_rebal_policy') unless $rebal_pol; + + my $rebal = MogileFS::Rebalance->new; + $rebal->policy($rebal_pol); + my @devs = MogileFS::Device->devices; + $rebal->init(\@devs); + my $sdevs = $rebal->source_devices; + + $rebal_state = $rebal->save_state; + MogileFS::Config->set_server_setting('rebal_state', $rebal_state); + } # TODO: register start time somewhere. MogileFS::Config->set_server_setting('rebal_host', MogileFS::Config->hostname); - return $self->ok_line({ state => $state }); + return $self->ok_line({ state => $rebal_state }); } sub cmd_rebalance_test { @@ -1485,14 +1488,23 @@ sub cmd_rebalance_test { return $self->ok_line($ret); } +sub cmd_rebalance_reset { + my MogileFS::Worker::Query $self = shift; + my $host = MogileFS::Config->server_setting('rebal_host'); + if ($host) { + return $self->err_line("rebal_running", "rebalance is running") if $host; + } + MogileFS::Config->set_server_setting('rebal_state', undef); + return $self->ok_line; +} + sub cmd_rebalance_stop { my MogileFS::Worker::Query $self = shift; my $host = MogileFS::Config->server_setting('rebal_host'); unless ($host) { return $self->err_line('rebal_not_started'); } - # TODO: put stop time somewhere. - MogileFS::Config->set_server_setting('rebal_host', undef); + MogileFS::Config->set_server_setting('rebal_signal', 'stop'); return $self->ok_line; } @@ -1513,6 +1525,7 @@ sub cmd_rebalance_set_policy { } MogileFS::Config->set_server_setting('rebal_policy', $args->{policy}); + MogileFS::Config->set_server_setting('rebal_state', undef); return $self->ok_line; } From a13ea17d749f27ea560e8d7b4f4759eae11b63fb Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Sep 2010 15:51:23 -0700 Subject: [PATCH 005/405] modify some dotfiles. add a gitignore and a temporary .shipit --- .gitignore | 3 +++ .shipit | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0b5bd399 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +Makefile +blib +pm_to_blib diff --git a/.shipit b/.shipit index ca301200..b5720000 100644 --- a/.shipit +++ b/.shipit @@ -1,4 +1,4 @@ -steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN +steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist -svn.tagpattern = mogilefs-server-%v +git.tagpattern = %v From 87f9bf5170417d40619a34bb3916308fa56a271c Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Sep 2010 15:54:24 -0700 Subject: [PATCH 006/405] Checking in changes prior to tagging of version 2.40. Changelog diff is: diff --git a/CHANGES b/CHANGES index dd8ce3d..bb9514d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,8 +1,11 @@ +2010-09-28: Release version 2.40 + * Optimize worker sleep patterns and worker/parent communications. * New rebalance/drain code. See the docs/wiki for how to use. ***NOTE*** Old drain/rebalance code is disabled. Setting a device into drain mode no longer does what you think it would. + http://code.google.com/p/mogilefs/wiki/Rebalance 2010-08-13: Release version 2.37 --- CHANGES | 3 +++ MANIFEST | 2 ++ MANIFEST.SKIP | 1 + lib/MogileFS/Server.pm | 2 +- 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index dd8ce3d8..bb9514d8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,8 +1,11 @@ +2010-09-28: Release version 2.40 + * Optimize worker sleep patterns and worker/parent communications. * New rebalance/drain code. See the docs/wiki for how to use. ***NOTE*** Old drain/rebalance code is disabled. Setting a device into drain mode no longer does what you think it would. + http://code.google.com/p/mogilefs/wiki/Rebalance 2010-08-13: Release version 2.37 diff --git a/MANIFEST b/MANIFEST index b06fb781..f290d4bb 100644 --- a/MANIFEST +++ b/MANIFEST @@ -62,6 +62,7 @@ lib/MogileFS/HTTPFile.pm lib/MogileFS/IOStatWatcher.pm lib/MogileFS/Overview.pm lib/MogileFS/ProcManager.pm +lib/MogileFS/Rebalance.pm lib/MogileFS/ReplicationPolicy.pm lib/MogileFS/ReplicationPolicy/MultipleHosts.pm lib/MogileFS/ReplicationPolicy/Union.pm @@ -102,6 +103,7 @@ mogstored t/00-startup.t t/10-weighting.t t/20-filepaths.t +t/30-rebalance.t t/domains-classes.t t/fid-stat.t t/hosts-devices.t diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index 51132237..602bcd62 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -14,6 +14,7 @@ _blib$ \bdebian\b \bconf\b \.svn +\.git dev-killmogstored.sh make-par.sh mogstored.pp diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 27f6081f..c1620e4e 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.37"; +$VERSION = "2.40"; =head1 NAME From ab9c945f966a9fee73244da556f0f381912ba4b4 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Sep 2010 15:56:17 -0700 Subject: [PATCH 007/405] add UploadCPAN back to .shipit --- .shipit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.shipit b/.shipit index b5720000..a797898d 100644 --- a/.shipit +++ b/.shipit @@ -1,4 +1,4 @@ -steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist +steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN git.tagpattern = %v From 8c528211e89c9507a3a0f66f9a29023dab0b5a9f Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Sep 2010 16:01:07 -0700 Subject: [PATCH 008/405] update the specfile. not worth re-tagging since the spec doesn't go into the tarball yet :/ need to fix shipit for specfile updates as I want to include it for future rels --- MogileFS-Server.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 50147b16..b45b9ccd 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.36 +version: 2.40 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert From 6034bfb6cabce1c5456ce1c55f76a0a2aa52cd52 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Sep 2010 16:10:28 -0700 Subject: [PATCH 009/405] miyagawa pointed out the NAME was wrong. --- Makefile.PL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.PL b/Makefile.PL index 6dcbb9e5..e425c64b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -16,7 +16,7 @@ $exefiles = ["mogstored"] if exists $ENV{DANGABUILD_MOGSTOREDONLY}; $exefiles = ["mogilefsd"] if exists $ENV{DANGABUILD_MOGILEFSDONLY}; WriteMakefile( - NAME => 'mogilefs-server', + NAME => 'MogileFS::Server', VERSION_FROM => 'lib/MogileFS/Server.pm', AUTHOR => 'Brad Fitzpatrick ', ABSTRACT_FROM => 'lib/MogileFS/Server.pm', From 26ef3b3000f4b02d9067fbe28bdc535d7533d3a1 Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Thu, 30 Sep 2010 16:37:09 -0700 Subject: [PATCH 010/405] Update specfile version. --- MogileFS-Server.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 50147b16..b4023d4e 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,8 +2,8 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.36 -release: 2%{?dist} +version: 2.37 +release: 1%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert license: Artistic From 935d9e49b12d3952d6cafc5e9e62c5e2a3def6f5 Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Mon, 4 Oct 2010 14:52:48 -0700 Subject: [PATCH 011/405] Change source tarball name in specfile --- MogileFS-Server.spec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 3bcaf62f..640fcba5 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -3,14 +3,14 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. version: 2.40 -release: 1%{?dist} +release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert license: Artistic group: Applications/CPAN buildroot: %{_tmppath}/%{name}-%{version}-%(id -u -n) buildarch: noarch -source: mogilefs-server-%{version}.tar.gz +source: MogileFS-Server-%{version}.tar.gz autoreq: no requires: MogileFS-Server-mogilefsd = %{version}-%{release} requires: MogileFS-Server-mogstored = %{version}-%{release} @@ -26,7 +26,7 @@ This is a dummy package which depends on all the others so you can install them %prep rm -rf "%{buildroot}" -%setup -n mogilefs-server-%{version} +%setup -n MogileFS-Server-%{version} %build %{__perl} Makefile.PL INSTALLDIRS="vendor" PREFIX=%{buildroot}%{_prefix} From 291764f6eefe58dcb90c564d3aab7a6c4a936299 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 6 Oct 2010 14:44:05 -0700 Subject: [PATCH 012/405] remove single line fail that prevents upgrading. --- lib/MogileFS/Worker/Replicate.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 035dd584..7131f156 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -11,7 +11,6 @@ use List::Util (); use MogileFS::Util qw(error every debug); use MogileFS::Config; use MogileFS::Class; -use MogileFS::RebalancePolicy::DrainDevices; use MogileFS::ReplicationRequest qw(rr_upgrade); # setup the value used in a 'nexttry' field to indicate that this item will never From 74b06508869dda2610d43184f5e323370e36e5c7 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 6 Oct 2010 14:45:32 -0700 Subject: [PATCH 013/405] Checking in changes prior to tagging of version 2.41. Changelog diff is: diff --git a/CHANGES b/CHANGES index bb9514d..a320e87 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2010-10-06: Release version 2.41 + + * One line change that to fix uprading to 2.40 + 2010-09-28: Release version 2.40 * Optimize worker sleep patterns and worker/parent communications. --- CHANGES | 4 ++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index bb9514d8..a320e879 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2010-10-06: Release version 2.41 + + * One line change that to fix uprading to 2.40 + 2010-09-28: Release version 2.40 * Optimize worker sleep patterns and worker/parent communications. diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index c1620e4e..4613abde 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.40"; +$VERSION = "2.41"; =head1 NAME From b4d3f65d7ddfc88c5050965cc06a406921f3bd86 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 6 Oct 2010 14:46:03 -0700 Subject: [PATCH 014/405] bump spec. still not included in tarball.. --- MogileFS-Server.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 640fcba5..29317732 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.40 +version: 2.41 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert From ff685ed7c86e483060528dd74c48f782b06e6b8d Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Wed, 6 Oct 2010 22:30:53 -0700 Subject: [PATCH 015/405] Ignore INSTALL_BASE env while building an RPM. --- MogileFS-Server.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 29317732..b0f63517 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -29,7 +29,7 @@ rm -rf "%{buildroot}" %setup -n MogileFS-Server-%{version} %build -%{__perl} Makefile.PL INSTALLDIRS="vendor" PREFIX=%{buildroot}%{_prefix} +%{__perl} Makefile.PL INSTALLDIRS="vendor" PREFIX=%{buildroot}%{_prefix} INSTALL_BASE= make all make test From 9f471a6bdc13aa8dd291d9cb11ada89b40c7fcbf Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 8 Oct 2010 21:20:54 -0700 Subject: [PATCH 016/405] hopefully fix postgres schema upgrade issue. --- CHANGES | 2 ++ lib/MogileFS/Store/Postgres.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index a320e879..1e792631 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ + * Fix Postgres schema upgrade issue (hopefully) + 2010-10-06: Release version 2.41 * One line change that to fix uprading to 2.40 diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 56cf5da0..42de5d03 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -293,7 +293,7 @@ sub upgrade_add_device_drain { sub upgrade_modify_server_settings_value { my $self = shift; unless ($self->column_type("server_settings", "value" =~ /text/i)) { - $self->dowell("ALTER TABLE server_settings MODIFY COLUMN value TEXT"); + $self->dowell("ALTER TABLE server_settings ALTER COLUMN value TYPE TEXT"); } } From 6d4052dd66d6c08c00d6aa698c05239fce9509a6 Mon Sep 17 00:00:00 2001 From: Andre Pascha Date: Fri, 8 Oct 2010 21:32:34 -0700 Subject: [PATCH 017/405] Make FSCK work again broken by dormando, fix verified by dormando. seems to work now... didn't realize FSCK doesn't have any tests. --- CHANGES | 2 ++ lib/MogileFS/Store.pm | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 1e792631..2ea213da 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ + * Make FSCK run again (Andre Pascha) + * Fix Postgres schema upgrade issue (hopefully) 2010-10-06: Release version 2.41 diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b118c313..b05ec608 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1315,7 +1315,7 @@ sub enqueue_many_for_todo { } else { $self->dbh->do($self->ignore_replace . " INTO file_to_queue (fid, type, nexttry) VALUES " . - join(",", map { "(" . int($_->{fid}) . ", $type, $nexttry)" } @$fidids)); + join(",", map { "(" . int($_) . ", $type, $nexttry)" } @$fidids)); } }); $self->condthrow; @@ -1418,7 +1418,7 @@ sub get_fidids_above_id { my $dbh = $self->dbh; my $fidids = $dbh->selectcol_arrayref(qq{SELECT fid FROM file WHERE fid > ? - ORDER BY fid LIMIT $limit}); + ORDER BY fid LIMIT $limit}, undef, $fidid); return $fidids; } From 6c097d0ec40ac8dcb2d9e8a1b8a78247089005ae Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 8 Oct 2010 21:34:46 -0700 Subject: [PATCH 018/405] Checking in changes prior to tagging of version 2.42. Changelog diff is: diff --git a/CHANGES b/CHANGES index 2ea213d..43f035d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-10-08: Release version 2.42 + * Make FSCK run again (Andre Pascha) * Fix Postgres schema upgrade issue (hopefully) --- CHANGES | 2 ++ MogileFS-Server.spec | 2 +- lib/MogileFS/Server.pm | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 2ea213da..43f035d9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-10-08: Release version 2.42 + * Make FSCK run again (Andre Pascha) * Fix Postgres schema upgrade issue (hopefully) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index b0f63517..35078957 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.41 +version: 2.42 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 4613abde..45655ff4 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.41"; +$VERSION = "2.42"; =head1 NAME From 9cdf0bdf7d825914adedb1bc68710b61485fa0a7 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 9 Oct 2010 21:19:57 -0700 Subject: [PATCH 019/405] make over replicate/drain work again. --- lib/MogileFS/Worker/Replicate.pm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 7131f156..db134f01 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -204,6 +204,7 @@ sub replicate_using_torepl_table { # Return 1 on success, 0 on failure. sub rebalance_devfid { my ($self, $devfid, $opts) = @_; + $opts ||= {}; MogileFS::Util::okay_args($opts, qw(avoid_devids target_devids)); my $fid = $devfid->fid; @@ -317,7 +318,7 @@ sub replicate { my $sdevid = delete $opts{'source_devid'}; my $mask_devids = delete $opts{'mask_devids'} || {}; my $avoid_devids = delete $opts{'avoid_devids'} || {}; - my $target_devids = delete $opts{'target_devids'}; # inverse of avoid_devids. + my $target_devids = delete $opts{'target_devids'} || []; # inverse of avoid_devids. die "unknown_opts" if %opts; die unless ref $mask_devids eq "HASH"; @@ -403,7 +404,7 @@ sub replicate { my $copy_err; my $dest_devs = $devs; - if ($target_devids) { + if (@$target_devids) { $dest_devs = {map { $_ => $devs->{$_} } @$target_devids}; } From ac5fbe3f75650c4e2c20a7e29f72b18025ea537d Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 10 Oct 2010 02:24:16 -0700 Subject: [PATCH 020/405] allow dropping mindevcount from > 1 to 1. old bug in the special case here. dropping from mindevcount > 1 down to 1 would always short circuit as GOOD. Now it can drop through to TOO_GOOD. --- lib/MogileFS/ReplicationPolicy/MultipleHosts.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm index bd25f98d..84a569d3 100644 --- a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm +++ b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm @@ -44,7 +44,7 @@ sub replicate_to { my $already_on = @$on_devs; # a silly special case, bail out early. - return ALL_GOOD if $min == 1 && $already_on; + return ALL_GOOD if $min == 1 && $already_on == 1; # total disks available which are candidates for having files on them my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; From 6933dc63cc609e766354090484ab9d17ec6b6282 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 10 Oct 2010 02:28:22 -0700 Subject: [PATCH 021/405] CHANGES update. --- CHANGES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index 43f035d9..583afd64 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ + * Make over-replication fixable again. (sigh) + + * Fix old bug in MultipleHosts() which would not let you drop from + mindev > 1 down to 1. + 2010-10-08: Release version 2.42 * Make FSCK run again (Andre Pascha) From 0f266403be80e93e7f94bec4644538e3d8125aea Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 10 Oct 2010 23:46:47 -0700 Subject: [PATCH 022/405] Checking in changes prior to tagging of version 2.43. Changelog diff is: diff --git a/CHANGES b/CHANGES index 583afd6..bef61ed 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-10-10: Release version 2.43 + * Make over-replication fixable again. (sigh) * Fix old bug in MultipleHosts() which would not let you drop from --- CHANGES | 2 ++ MogileFS-Server.spec | 2 +- lib/MogileFS/Server.pm | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 583afd64..bef61ed8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-10-10: Release version 2.43 + * Make over-replication fixable again. (sigh) * Fix old bug in MultipleHosts() which would not let you drop from diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 35078957..866abb08 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.42 +version: 2.43 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 45655ff4..aff6d523 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.42"; +$VERSION = "2.43"; =head1 NAME From 046d04bacf7aeed576ca7f2c11bdea56aa77790f Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 25 Nov 2010 03:40:44 -0800 Subject: [PATCH 023/405] fix rebalance sdev -> ddev filter. --- CHANGES | 3 +++ lib/MogileFS/Rebalance.pm | 2 +- t/30-rebalance.t | 25 ++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index bef61ed8..6b62752c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ + * Fix issue with rebalance not filtering source devs from the + destination list. + 2010-10-10: Release version 2.43 * Make over-replication fixable again. (sigh) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index e8059c80..64edd8f7 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -429,7 +429,7 @@ sub filter_dest_devices { # FIXME: ends up not skipping stuff out of completed_devs? :/ my %sdevs = map { $_ => 1 } @{$state->{source_devs}}, @{$state->{completed_devs}}, $state->{sdev_current}; - my @devs = grep { ! $sdevs{$_} } @$devs; + my @devs = grep { ! $sdevs{$_->id} } @$devs; my @ddevs = (); for my $dev (@devs) { diff --git a/t/30-rebalance.t b/t/30-rebalance.t index d25daafd..990fa435 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -13,7 +13,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 40; + plan tests => 53; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -182,6 +182,14 @@ if ($@) { #print Dumper($saved_state), "\n"; #print Dumper($devfids2), "\n"; +# ensure all devices are still marked alive. +ok($tmptrack->mogadm("device", "mark", "hostA", 1, "alive"), "dev1 alive"); +ok($tmptrack->mogadm("device", "mark", "hostA", 2, "alive"), "dev2 alive"); +ok($tmptrack->mogadm("device", "mark", "hostB", 3, "alive"), "dev3 alive"); +ok($tmptrack->mogadm("device", "mark", "hostB", 4, "alive"), "dev4 alive"); +ok($tmptrack->mogadm("device", "mark", "hostC", 5, "alive"), "dev5 alive"); +ok($tmptrack->mogadm("device", "mark", "hostC", 6, "alive"), "dev6 alive"); + use MogileFS::Admin; my $moga = MogileFS::Admin->new( domain => "testdom", @@ -190,6 +198,21 @@ my $moga = MogileFS::Admin->new( ok(! defined $moga->rebalance_stop); my $res; + +# Quickly test the "no dupes" policy. +# ensures that source devices are properly filtered. +my $rebal_pol_dupes = "from_devices=1"; +ok($res = $moga->rebalance_set_policy($rebal_pol_dupes)); +if (! defined $res) { + print "Admin error: ", $moga->errstr, "\n"; +} +ok($res = $moga->rebalance_test); +{ + for my $dev (sort split /,/, $res->{ddevs}) { + ok($dev != 1); + } +} + ok($res = $moga->rebalance_set_policy($rebal_pol)); if (! defined $res) { print "Admin error: ", $moga->errstr, "\n"; From fce31b2b47b7873d65ef37ba53f0c72457b79811 Mon Sep 17 00:00:00 2001 From: Martijn Lina Date: Thu, 25 Nov 2010 03:45:17 -0800 Subject: [PATCH 024/405] Patch to make global limits work correctly State couldn't unserialize properly due to missing the default for the 'limit' state. --- CHANGES | 2 ++ lib/MogileFS/Rebalance.pm | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGES b/CHANGES index 6b62752c..45b5de6a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ * Fix issue with rebalance not filtering source devs from the destination list. + * Make global rebalance limits work (Martijn Lina) + 2010-10-10: Release version 2.43 * Make over-replication fixable again. (sigh) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 64edd8f7..39cbb70a 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -51,6 +51,7 @@ my %default_state = ( sdev_current => 0, sdev_lastfid => 0, sdev_limit => 0, + limit => 0, fids_queued => 0, bytes_queued => 0, time_started => 0, From 95362bd6b9c59b7a5dcd37cb83d49dc35e23be79 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 25 Nov 2010 03:48:13 -0800 Subject: [PATCH 025/405] Checking in changes prior to tagging of version 2.44. Changelog diff is: diff --git a/CHANGES b/CHANGES index 45b5de6..5460b52 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-11-25: Release version 2.44 + * Fix issue with rebalance not filtering source devs from the destination list. --- CHANGES | 2 ++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 45b5de6a..5460b522 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2010-11-25: Release version 2.44 + * Fix issue with rebalance not filtering source devs from the destination list. diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index aff6d523..86af0dbf 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.43"; +$VERSION = "2.44"; =head1 NAME From 47f6ca566ac758e9964c5a925c211480a692484c Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Mon, 27 Sep 2010 13:02:03 +0100 Subject: [PATCH 026/405] Changed repository metadata --- Makefile.PL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.PL b/Makefile.PL index e425c64b..8c283f71 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -43,7 +43,7 @@ WriteMakefile( resources => { homepage => 'http://danga.com/mogilefs/', bugtracker => 'http://rt.cpan.org/Public/Dist/Display.html?Name=mogilefs-server', - repository => 'http://code.sixapart.com/svn/mogilefs/', + repository => 'git://github.com/mogilefs/MogileFS-Server.git', MailingList => 'http://groups.google.com/group/mogile', }, From 653ad0541dff3054b450423c2a7959071626da3a Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 9 Dec 2010 21:40:01 -0800 Subject: [PATCH 027/405] some further editing of the info file. --- Makefile.PL | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.PL b/Makefile.PL index 8c283f71..cd5af502 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -41,8 +41,8 @@ WriteMakefile( 'MogPath', 'Mgd'], }, resources => { - homepage => 'http://danga.com/mogilefs/', - bugtracker => 'http://rt.cpan.org/Public/Dist/Display.html?Name=mogilefs-server', + homepage => 'http://www.mogilefs.org', + bugtracker => 'http://code.google.com/p/mogilefs/issues/list', repository => 'git://github.com/mogilefs/MogileFS-Server.git', MailingList => 'http://groups.google.com/group/mogile', }, From 499598cbfb8ca0654dbc1f2b5f93112b89fded65 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 21 Oct 2010 11:09:15 +0300 Subject: [PATCH 028/405] HTTPFile returns -1 if the file is missing from the device. Content-Length: 0 is valid and it should return BLEN not MISS. --- lib/MogileFS/Worker/Fsck.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 13188c57..ea8974f4 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -296,7 +296,7 @@ sub fix_fid { # don't log in desperate mode, as we'd have "file missing!" log entries # for every device in the normal case, which is expected. unless ($is_desperate_mode) { - if (! $disk_size) { + if ($disk_size == -1) { $fid->fsck_log(EV_FILE_MISSING, $dev); } else { $fid->fsck_log(EV_BAD_LENGTH, $dev); From 9b11db93e994a04efe47ffe35155db15d246d76a Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Mon, 25 Oct 2010 15:41:47 +0300 Subject: [PATCH 029/405] Documentation updates. --- lib/MogileFS/HTTPFile.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 2b4f307b..fe727eac 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -93,7 +93,7 @@ sub delete { } # returns size of file, (doing a HEAD request and looking at content-length, or side-channel to mogstored) -# returns 0 on file missing (404 or -1 from sidechannel), +# returns -1 on file missing (404 or -1 from sidechannel), # returns undef on connectivity error use constant FILE_MISSING => -1; sub size { @@ -143,7 +143,7 @@ sub size { my $stream_response_timeout = 1.0; my $read_timed_out = 0; - # returns defined on a real answer (0 = file missing, >0 = file length), + # returns defined on a real answer (-1 = file missing, >=0 = file length), # returns undef on connectivity problems. my $parse_response = sub { # give the socket 1 second to become readable until we get @@ -171,7 +171,7 @@ sub size { return undef unless $line =~ /^(\S+)\s+(-?\d+)/; # expected format: "uri size" return undeferr("get_file_size() requested size of $path, got back size of $1 ($2 bytes)") if $1 ne $uri; - # backchannel sends back -1 on non-existent file, which we map to the defined value '0' + # backchannel sends back -1 on non-existent file, which we map to the defined value '-1' return FILE_MISSING if $2 < 0; # otherwise, return byte size of file return $2+0; From 75a623a356966971d687e84c6a5e0cf8096b400a Mon Sep 17 00:00:00 2001 From: Andre Bohr Date: Thu, 23 Dec 2010 22:32:08 -0800 Subject: [PATCH 030/405] debian packaging updates. --- conf/mogilefsd.conf | 5 +++- debian/changelog | 62 ++++++++++++++++++++++++++++++++++++++++ debian/control | 6 ++-- debian/mogilefsd.init | 12 ++++++-- debian/mogilefsd.install | 2 -- debian/mogilefsd.postrm | 2 +- debian/mogstored.init | 9 +++++- debian/mogstored.postrm | 2 +- 8 files changed, 89 insertions(+), 11 deletions(-) diff --git a/conf/mogilefsd.conf b/conf/mogilefsd.conf index c46a3f02..ad3ebb92 100644 --- a/conf/mogilefsd.conf +++ b/conf/mogilefsd.conf @@ -1,4 +1,7 @@ -#daemonize = 1 +# Enable daemon mode to work in background and use syslog +daemonize = 1 +# Where to store the pid of the daemon (must be the same in the init script) +pidfile = /var/run/mogilefsd/mogilefsd.pid # Database connection information db_dsn = DBI:mysql:mogilefs:host=127.0.0.1 db_user = username diff --git a/debian/changelog b/debian/changelog index 856f8813..818c074e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,65 @@ +mogilefs-server (2.44-1) stable; urgency=low + + * pull from upstream + + -- Dormando Thu, 23 Dec 2010 21:58:49 -0800 + +mogilefs-server (2.43-1) stable; urgency=low + + * pulled 2.43 from github which includes fixes from 2.42-1 + * added pidfile entry to default mogilefsd.conf to fix pidfile creation bug + * added daemon = 1 to default mogilefsd.conf to make mogilefsd use syslog + + -- Andre Pascha Mon, 11 Oct 2010 10:21:27 +0200 + +mogilefs-server (2.42-1kwick2) stable; urgency=low + + * added fixes for over replicated files of todays github pull + * include mindevcount=1 fixes + + -- Andre Pascha Sun, 10 Oct 2010 14:34:25 +0200 + +mogilefs-server (2.42-1kwick1) stable; urgency=low + + * git pull of today + * includes fix from Andre Pascha to make FSCK work again + + -- Andre Pascha Sat, 09 Oct 2010 18:12:18 +0200 + +mogilefs-server (2.41-1kwick3) stable; urgency=low + + * fixed problem with wrong permissions on mogilefsd pid file + + -- Andre Pascha Fri, 08 Oct 2010 17:43:44 +0200 + +mogilefs-server (2.41-1kwick2) stable; urgency=low + + * added pid directory to mogstored/mogilefsd. fixes problem with stopping + the daemon via init skript when daemon is set to 1 in the config. Daemon + needs to be set to 1 or syslogging won't be enabled. + + -- Andre Pascha Fri, 08 Oct 2010 17:33:11 +0200 + +mogilefs-server (2.41-1kwick1) stable; urgency=low + + * moved to github version of MogileFS-Server + * package versioning now coresponds to mogilefs version + + -- Andre Pascha Fri, 08 Oct 2010 16:59:33 +0200 + +mogilefs-server (1.00-2kwick2) stable; urgency=low + + * added dependency to Danga::Socket for mogostored + + -- Andre Pascha Fri, 08 Oct 2010 15:29:46 +0200 + +mogilefs-server (1.00-2kwick1) stable; urgency=low + + * Non-maintainer upload. + * SVN snapshot of 06 Oct 2010 + + -- Andre Pascha Thu, 07 Oct 2010 17:26:01 +0200 + mogilefs-server (1.00-2) unstable; urgency=low * fix data loss bug when the 'tempfile' table is InnoDB and the diff --git a/debian/control b/debian/control index 7524611c..3e6bf33c 100644 --- a/debian/control +++ b/debian/control @@ -2,12 +2,12 @@ Source: mogilefs-server Section: perl Priority: optional Maintainer: Jonathan Steinert -Build-Depends-Indep: libstring-crc32-perl +Build-Depends-Indep: debhelper (>= 4.1.40), perl (>= 5.6.0-16), libstring-crc32-perl Standards-Version: 3.6.1.0 Package: mogstored Architecture: all -Depends: ${perl:Depends}, debhelper (>= 4.1.40), libperlbal-perl, libio-aio-perl, debconf (>= 1.2.0) +Depends: ${perl:Depends}, libperlbal-perl, libio-aio-perl, debconf (>= 1.2.0) Suggests: mogilefs-utils Description: storage node daemon for MogileFS Mogstored is a storage node daemon for MogileFS, the open-source @@ -15,7 +15,7 @@ Description: storage node daemon for MogileFS Package: mogilefsd Architecture: all -Depends: ${perl:Depends}, debhelper (>= 4.1.40), libdbd-mysql-perl, libdbi-perl, debconf (>= 1.2.0), libnet-netmask-perl, libwww-perl +Depends: ${perl:Depends}, libdbd-mysql-perl, libdbi-perl, debconf (>= 1.2.0), libnet-netmask-perl, libwww-perl, libdanga-socket-perl Suggests: mogilefs-utils Description: scalable distributed filesystem from Danga Interactive MogileFS is an open-source, application-level distributed filesystem. It diff --git a/debian/mogilefsd.init b/debian/mogilefsd.init index 75c13559..d7c3a9fb 100755 --- a/debian/mogilefsd.init +++ b/debian/mogilefsd.init @@ -15,7 +15,8 @@ DAEMON=/usr/bin/mogilefsd NAME=mogilefsd DESC=mogilefsd DEFAULTS=/etc/default/$NAME -PIDFILE=/var/run/$NAME.pid +PIDDIR=/var/run/$NAME +PIDFILE=$PIDDIR/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME # Exit if the package is not installed @@ -47,6 +48,13 @@ set -e # do_start() { + + if ! test -d ${PIDDIR} + then + mkdir ${PIDDIR} + chown ${MOGILEFSD_RUNASUSER} ${PIDDIR} + fi + if [ -e $PIDFILE ] then @@ -61,7 +69,7 @@ do_start() fi - start-stop-daemon --start --quiet --exec $DAEMON --pidfile $PIDFILE -b -m --name $NAME --chuid $MOGILEFSD_RUNASUSER + start-stop-daemon --start --quiet --exec $DAEMON -b --name $NAME -- chuid $MOGILEFSD_RUNASUSER } # diff --git a/debian/mogilefsd.install b/debian/mogilefsd.install index 0cd0acf4..d06e00a5 100644 --- a/debian/mogilefsd.install +++ b/debian/mogilefsd.install @@ -3,5 +3,3 @@ usr/bin/mogilefsd /usr/bin usr/share/man/man1/mogilefsd.1p /usr/share/man/man1 usr/share/man/man3/MogileFS* /usr/share/man/man3 usr/share/perl5/MogileFS /usr/share/perl5 -usr/share/perl5/dev-mogstored.pl /usr/share/mogilefsd -usr/share/perl5/makedocs.pl /usr/share/mogilefsd diff --git a/debian/mogilefsd.postrm b/debian/mogilefsd.postrm index 33aa8910..ce101287 100644 --- a/debian/mogilefsd.postrm +++ b/debian/mogilefsd.postrm @@ -11,5 +11,5 @@ if [ "$1" = "purge" ] rmdir --ignore-fail-on-non-empty /etc/default fi -rm -f /var/run/mogilefsd.pid +rm -fr /var/run/mogilefsd diff --git a/debian/mogstored.init b/debian/mogstored.init index 682a125e..06c5ec76 100755 --- a/debian/mogstored.init +++ b/debian/mogstored.init @@ -15,7 +15,8 @@ DAEMON=/usr/bin/mogstored NAME=mogstored DESC=mogstored DEFAULTS=/etc/default/$NAME -PIDFILE=/var/run/$NAME.pid +PIDDIR=/var/run/$NAME +PIDFILE=$PIDDIR/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME # Exit if the package is not installed @@ -47,6 +48,12 @@ set -e # do_start() { + if ! test -d ${PIDDIR} + then + mkdir ${PIDDIR} + chown ${MOGSTORED_RUNASUSER} ${PIDDIR} + fi + if [ -e $PIDFILE ] then diff --git a/debian/mogstored.postrm b/debian/mogstored.postrm index 4c9069ca..bb836fc6 100644 --- a/debian/mogstored.postrm +++ b/debian/mogstored.postrm @@ -19,6 +19,6 @@ fi #DEBHELPER# - rm -f /var/run/mogstored.pid + rm -fr /var/run/mogstored From 81035191000b4d1705c6394ae410043158640455 Mon Sep 17 00:00:00 2001 From: kad Date: Thu, 23 Dec 2010 22:34:29 -0800 Subject: [PATCH 031/405] additional debian packaging fixes --- debian/control | 2 +- debian/mogilefsd.init | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index 3e6bf33c..d415603b 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Standards-Version: 3.6.1.0 Package: mogstored Architecture: all -Depends: ${perl:Depends}, libperlbal-perl, libio-aio-perl, debconf (>= 1.2.0) +Depends: ${perl:Depends}, libperlbal-perl (>= 1.76), libio-aio-perl, debconf (>= 1.2.0) Suggests: mogilefs-utils Description: storage node daemon for MogileFS Mogstored is a storage node daemon for MogileFS, the open-source diff --git a/debian/mogilefsd.init b/debian/mogilefsd.init index d7c3a9fb..b17a70e0 100755 --- a/debian/mogilefsd.init +++ b/debian/mogilefsd.init @@ -69,7 +69,7 @@ do_start() fi - start-stop-daemon --start --quiet --exec $DAEMON -b --name $NAME -- chuid $MOGILEFSD_RUNASUSER + start-stop-daemon --start --quiet --exec $DAEMON -b --name $NAME --chuid $MOGILEFSD_RUNASUSER -- $MOGILEFSD_EXTRA_OPTS } # From 25cb9231ce919d832c25628d35683219584d02f5 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Tue, 14 Dec 2010 04:33:15 +0000 Subject: [PATCH 032/405] Fix SQLite so tracker at least starts --- lib/MogileFS/Store.pm | 8 +++++--- lib/MogileFS/Store/SQLite.pm | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b05ec608..b084a673 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -124,6 +124,7 @@ sub grant_privileges { sub can_replace { 0 } sub can_insertignore { 0 } sub can_insert_multi { 0 } +sub can_for_update { 1 } sub unix_timestamp { die "No function in $_[0] to return DB's unixtime." } @@ -1506,15 +1507,16 @@ sub grab_queue_chunk { eval { $dbh->begin_work; my $ut = $self->unix_timestamp; - my $sth = $dbh->prepare(qq{ + my $query = qq{ SELECT $fields FROM $queue WHERE nexttry <= $ut $extwhere ORDER BY nexttry LIMIT $limit - FOR UPDATE - }); + }; + $query .= "FOR UPDATE\n" if $self->can_for_update; + my $sth = $dbh->prepare($query); $sth->execute; $work = $sth->fetchall_hashref('fid'); # Nothing to work on. diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 259734e8..80673663 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -30,6 +30,7 @@ sub dsn_of_root { sub can_replace { 1 } sub can_insertignore { 0 } +sub can_for_update { 0 } sub unix_timestamp { "strftime('%s','now')" } # DBD::SQLite doesn't really have any table meta info methods From ea9e7987ca131434b9241de18971b7f2c7751366 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Tue, 14 Dec 2010 05:04:11 +0000 Subject: [PATCH 033/405] Fix test db creation with SQLite --- lib/MogileFS/Store/SQLite.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 80673663..711cccb3 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -77,6 +77,7 @@ sub was_duplicate_error { sub new_temp { my ($fh, $filename) = File::Temp::tempfile(); + close($fh); system("$FindBin::Bin/../mogdbsetup", "--type=SQLite", "--yes", "--dbname=$filename") and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; From 8e6f4172f10cbbd31e77d6f98ad320956a508bca Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Tue, 14 Dec 2010 05:57:37 +0000 Subject: [PATCH 034/405] Correct iostat command on darwin --- lib/Mogstored/ChildProcess/IOStat.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/Mogstored/ChildProcess/IOStat.pm b/lib/Mogstored/ChildProcess/IOStat.pm index 15b6ec42..d2d2231c 100644 --- a/lib/Mogstored/ChildProcess/IOStat.pm +++ b/lib/Mogstored/ChildProcess/IOStat.pm @@ -4,6 +4,9 @@ use base 'Mogstored::ChildProcess'; my $docroot; +my $iostat_cmd = "iostat -dx 1 30"; +if ($^O =~ /darwin/) { $iostat_cmd =~ s/x// } + sub pre_exec_init { my $class = shift; @@ -48,7 +51,7 @@ sub run { my $get_iostat_fh = sub { while (1) { - if ($iostat_pid = open (my $fh, "iostat -dx 1 30|")) { + if ($iostat_pid = open (my $fh, "$iostat_cmd|")) { return $fh; } # TODO: try and find other paths to iostat From 5d5aea7005f85a57b0a0e0f6c68252d1ae1567e9 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 24 Dec 2010 02:20:02 -0800 Subject: [PATCH 035/405] add a couple useful stats amount of work sent to workers (more visibility into tracker balance), and how many times queries have waited for query workers. --- lib/MogileFS/ProcManager.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 5768c639..1e8d7a50 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -524,6 +524,7 @@ sub process_worker_queues { # allow workers to grab a linear range of work. while (@$queue && $worker->wants_todo($job)) { $worker->write(":queue_todo $job " . shift(@$queue) . "\r\n"); + $Stats{'work_sent_to_' . $job}++; } next JOB unless @$queue; } @@ -566,6 +567,11 @@ sub ProcessQueues { # 123-455 10.2.3.123 get_paths foo=bar&blah=bar\r\n $worker->write("$worker->{pid}-$worker->{reqid} $clref->[1]\r\n"); } + + if (@PendingQueries) { + # Don't like the name. Feel free to change if you find better. + $Stats{times_out_of_qworkers}++; + } } # send short descriptions of commands we support to the user From f279756d48b2da5ec2a2cb14b546965c902f69f5 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 24 Dec 2010 14:45:45 -0800 Subject: [PATCH 036/405] job_master was never revalidating its dbh handle. --- lib/MogileFS/Worker/JobMaster.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 572dde27..3d3663d3 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -48,6 +48,7 @@ sub work { every(1, sub { # 'pings' parent and populates all queues. $self->send_to_parent("queue_depth all"); + $self->validate_dbh; my $sto = Mgd::get_store(); $self->read_from_parent(1); my $active = 0; From 8bdc38b6e6f4fb4ea44d4809b2083d21b8750dea Mon Sep 17 00:00:00 2001 From: Jason Mills Date: Tue, 14 Dec 2010 19:45:16 -0800 Subject: [PATCH 037/405] Implements tests for MogileFS::Store::ignore_replace --- t/store.t | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/t/store.t b/t/store.t index c4affe6a..fae65828 100644 --- a/t/store.t +++ b/t/store.t @@ -11,7 +11,7 @@ use MogileFS::Test; my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 12; + plan tests => 14; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -63,3 +63,37 @@ is(scalar @on, 2, "FID 101 on 2 devices"); is($errc, "dup", "got a dup into tempfile") or die "Got error: $@\n"; } + +my $ignore_replace_match = { + base => { pattern => undef, dies => 1 }, + MySQL => { pattern => qr/INSERT IGNORE/, dies => 0 }, + SQLite => { pattern => qr/REPLACE/, dies => 0 }, + Postgres => { pattern => undef, dies => 1 }, +}; + +my $prx = eval { $sto->ignore_replace } || ''; +my $sto_driver = ( split( /::/, ref($sto) ) )[2] || 'base'; +my $match_spec = $ignore_replace_match->{ $sto_driver } + or die "Test not configured for '$sto_driver' storage driver"; + + +ok( + ref( $match_spec->{pattern} ) eq 'Regexp'? + ( $prx =~ $match_spec->{pattern} ) : + ( !$prx ), + sprintf( + "ignore_replace %s return value for storage type '%s'", + ref( $match_spec->{pattern} ) eq 'Regexp'? + 'should' : 'should not', + $sto_driver + ) +) or diag "Got value: $prx"; + +ok( + $match_spec->{dies}? $@ : !$@, + sprintf( + "ignore_replace %s die for storage type '%s'", + $match_spec->{dies}? 'should' : 'should not', + $sto_driver + ) +) or diag "Got exception: $@"; From 9441503e0548a4be73e93fed349d0878503bfa74 Mon Sep 17 00:00:00 2001 From: Jason Mills Date: Tue, 14 Dec 2010 19:48:10 -0800 Subject: [PATCH 038/405] Implements tests for MogileFS::Store::retry_on_deadlock --- t/store.t | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/t/store.t b/t/store.t index fae65828..77ec6c3a 100644 --- a/t/store.t +++ b/t/store.t @@ -11,7 +11,7 @@ use MogileFS::Test; my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 14; + plan tests => 21; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -97,3 +97,88 @@ ok( $sto_driver ) ) or diag "Got exception: $@"; + +my $rv; + +# test retry_on_deadlock using good sql +$rv = eval { + $sto->retry_on_deadlock( sub { $sto->dbh->do("SELECT 1;"); } ); +}; +ok ( + $rv eq '1' || $rv eq '0E0', + "retry_on_deadlock return value for '$sto_driver': $rv" +) or diag "Got return value: $rv"; + +# test retry_on_deadlock using bad sql +$rv = eval { + $sto->retry_on_deadlock( sub { $sto->dbh->do("BADSQL;"); } ); +}; +ok ( + $@ =~ /BADSQL/, + "retry_on_deadlock got an exception on bad sql '$sto_driver'" +) or diag "Got exception value: $@"; + +# test retry_on_deadlock using a custom exception +$rv = eval { + $sto->retry_on_deadlock( sub { die "preempt"; } ); +}; +ok ( + $@ =~ /preempt/, + "retry_on_deadlock got a non-sql exception for '$sto_driver'" +) or diag $@; + +sub _do_induce_deadlock { + my @args = @_; + return eval { + no strict 'refs'; + no warnings 'redefine'; + my $c = 0; + local *{ "MogileFS\::Store\::$sto_driver\::was_deadlock_error" } = sub { + return $c++ < 2; # unlock on third try + }; + $sto->retry_on_deadlock( @args ); + }; +} + +# attempt to induce a deadlock and check iterations +my $_v = 0; +$rv = _do_induce_deadlock( sub { return $_v++; } ); + +ok( + !$@, + "no exception on retry_on_deadlock while inducing a deadlock" +) or diag $@; + +ok( + $rv == 2, + 'retry_on_deadlock returned good iteration count while inducing a deadlock' +) or diag $rv; + +# induce a deadlock using badsql... should return an exemption +$rv = _do_induce_deadlock( sub { $sto->dbh->do("BADSQL;"); } ); +ok ( + !$rv && $@ =~ /BADSQL/, + "retry_on_deadlock got expected exemption inducing a deadlock with bad sql" +) or diag "Got value '$rv' with exemption: $@"; + +# induce a deadlock with good sql check sql return and iterations +$_v = 0; +$rv = _do_induce_deadlock( + sub { + return [ $sto->dbh->do("SELECT 1;"), $_v++ ]; + } +); +ok ( + ( !$@ && ref($rv) eq 'ARRAY' ) && ( + ( $rv->[0] eq '1' || $rv->[0] eq '0E0' ) && + $rv->[1] == 2 + ), + "retry_on_deadlock got proper return value and iteration while inducing a deadlock" +); + + + + + + + From 9d988d68bad4bc3b26811ae79236941d2d2c4af7 Mon Sep 17 00:00:00 2001 From: Jason Mills Date: Tue, 14 Dec 2010 19:54:55 -0800 Subject: [PATCH 039/405] Fixes MogileFS::Store::retry_on_deadlock to honor non-deadlock exceptions --- lib/MogileFS/Store.pm | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b084a673..7365d07a 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -381,9 +381,7 @@ sub retry_on_deadlock { while ($tries-- > 0) { $rv = eval { $code->(); }; next if ($self->was_deadlock_error); - if ($@) { - croak($@) unless $self->dbh->err; - } + croak($@) if $@; last; } return $rv; From 043d39d4a8a4bf6a46d87fcc669fd4acf636509c Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 16 Dec 2010 00:16:43 +0000 Subject: [PATCH 040/405] Postgres enqueue_fids_* cleanup Implement new versions of enqueue_fids_to_delete and enqueue_fids_to_delete2 for Postgresql, that should avoid the large number of rollbacks from before. --- lib/MogileFS/Store/Postgres.pm | 55 ++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 42de5d03..71e5f5f9 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -306,26 +306,69 @@ sub upgrade_add_file_to_queue_arg { # return 1 on success. die otherwise. sub enqueue_fids_to_delete { + # My kingdom for a real INSERT IGNORE implementation! my ($self, @fidids) = @_; my $sql = "INSERT INTO file_to_delete (fid) VALUES (?)"; - my $savepoint_name = "savepoint_enqueue_fids_to_delete"; - $self->dbh->begin_work; foreach my $fidid (@fidids) { - $self->dbh->do('SAVEPOINT '.$savepoint_name); + $self->dbh->begin_work; $self->condthrow; eval { $self->dbh->do($sql, undef, $fidid); }; if ($@ || $self->dbh->err) { if ($self->was_duplicate_error) { - $self->dbh->do('ROLLBACK TO '.$savepoint_name); + # Do nothing + } else { + $self->condthrow; } - $self->condthrow; } + $self->dbh->commit; + } + +} + +sub enqueue_fids_to_delete2 { + # My kingdom for a real REPLACE implementation! + my ($self, @fidids) = @_; + my $tbl = 'file_to_delete2'; + my $sql1 = sprintf "INSERT INTO %s (fid, nexttry) VALUES (?,%s)", $tbl, $self->unix_timestamp; + my @dup_fids; + + foreach my $fidid (@fidids) { + $self->dbh->begin_work; + $self->condthrow; + eval { + $self->dbh->do($sql1, undef, $fidid); + }; + if ($@ || $self->dbh->err) { + if ($self->was_duplicate_error) { + push @dup_fids, $fidid; + } else { + $self->condthrow; + } + } + $self->dbh->commit; + } + + my $sql2 = sprintf 'UPDATE %s SET nexttry = %s WHERE fid IN (?)', $tbl, $self->unix_timestamp; + + foreach my $fidid (@dup_fids) { + $self->dbh->begin_work; + $self->condthrow; + eval { + $self->dbh->do($sql2, undef, $fidid); + }; + if ($@ || $self->dbh->err) { + if ($self->was_duplicate_error) { + # Ignore, no need of it + } else { + $self->condthrow; + } + } + $self->dbh->commit; } - $self->dbh->commit; } # -------------------------------------------------------------------------- From 7f0c04b2a6ee550196c06e4604bcfc3968ab4279 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 31 Dec 2010 14:53:42 -0800 Subject: [PATCH 041/405] make list_fids work with gaps return hard count of keys each iteration instead of just what's "between" some set of numbers. --- lib/MogileFS/Store.pm | 6 +++--- lib/MogileFS/Worker/Query.pm | 10 ++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 7365d07a..f7fd773b 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -995,10 +995,10 @@ sub file_row_from_fidid { # return an arrayref of rows containing columns "fid, dmid, dkey, length, # classid, devcount" provided a pair of $fidid or undef if no rows. sub file_row_from_fidid_range { - my ($self, $fromfid, $tofid) = @_; + my ($self, $fromfid, $count) = @_; my $sth = $self->dbh->prepare("SELECT fid, dmid, dkey, length, classid, devcount ". - "FROM file WHERE fid BETWEEN ? AND ?"); - $sth->execute($fromfid,$tofid); + "FROM file WHERE fid > ? LIMIT ?"); + $sth->execute($fromfid,$count); return $sth->fetchall_arrayref({}); } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index a8cc6f09..beed1e03 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -497,13 +497,11 @@ sub cmd_list_fids { # validate parameters my $fromfid = ($args->{from} || 0)+0; - my $tofid = ($args->{to} || 0)+0; - $tofid ||= ($fromfid + 100); - $tofid = ($fromfid + 100) - if $tofid > $fromfid + 100 || - $tofid < $fromfid; + my $count = ($args->{to} || 0)+0; + $count ||= 100; + $count = 500 if $count > 500 || $count < 0; - my $rows = Mgd::get_store()->file_row_from_fidid_range($fromfid, $tofid); + my $rows = Mgd::get_store()->file_row_from_fidid_range($fromfid, $count); return $self->err_line('failure') unless $rows; return $self->ok_line({ fid_count => 0 }) unless @$rows; From d1a18605249b2d7ea52d88f77065c36a5e0858ab Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 31 Dec 2010 15:49:19 -0800 Subject: [PATCH 042/405] only dbh->ping if not used in over a minute avoid unnecessary roundtrips to the database, which we were previously doing once per command. --- lib/MogileFS/Store.pm | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index f7fd773b..0da1dd7d 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -52,6 +52,8 @@ sub new_from_dsn_user_pass { slave_list_cache => [], recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was + recheck_after => 60, # ->ping at most once per minute. + last_used => 0, # how stale is the handle? handles_left => 0, # amount of times this handle can still be verified server_setting_cache => {}, # value-agnostic db setting cache. }, $subclass; @@ -264,16 +266,22 @@ sub recheck_dbh { sub dbh { my $self = shift; + my $now = time(); if ($self->{dbh}) { - if ($self->{recheck_done_gen} != $self->{recheck_req_gen}) { + if ($self->{last_used} < $now - $self->{recheck_after}) { $self->{dbh} = undef unless $self->{dbh}->ping; + } + if ($self->{recheck_done_gen} != $self->{recheck_req_gen}) { # Handles a memory leak under Solaris/Postgres. $self->{dbh} = undef if ($self->{max_handles} && $self->{handles_left}-- < 0); $self->{recheck_done_gen} = $self->{recheck_req_gen}; } - return $self->{dbh} if $self->{dbh}; + if ($self->{dbh}) { + $self->{last_used} = $now; + return $self->{dbh}; + } } $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { @@ -285,6 +293,7 @@ sub dbh { die "Failed to connect to database: " . DBI->errstr; $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; + $self->{last_used} = $now; return $self->{dbh}; } From af1baa9da2531369bc92a9d73bbe5caa138f963d Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 31 Dec 2010 16:00:06 -0800 Subject: [PATCH 043/405] validate database handles in the right places goes along with the "don't ping too much" update. --- lib/MogileFS/Worker/Fsck.pm | 2 ++ lib/MogileFS/Worker/Replicate.pm | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index ea8974f4..dc9f4619 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -89,6 +89,8 @@ sub work { my $queue_todo = $self->queue_todo('fsck'); # This counts the same as a $self->still_alive; $self->send_to_parent('worker_bored 50 fsck'); + return unless @{$queue_todo}; + $self->validate_dbh; my @fids = (); while (my $todo = shift @{$queue_todo}) { diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index db134f01..7ea0e993 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -45,9 +45,6 @@ sub work { return; } - $self->validate_dbh; - my $dbh = $self->get_dbh or return 0; - my $sto = Mgd::get_store(); $self->send_to_parent("worker_bored 100 replicate rebalance"); my $queue_todo = $self->queue_todo('replicate'); @@ -56,6 +53,10 @@ sub work { return; } + $self->validate_dbh; + my $dbh = $self->get_dbh or return 0; + my $sto = Mgd::get_store(); + while (my $todo = shift @$queue_todo) { my $fid = $todo->{fid}; $self->replicate_using_torepl_table($todo); From e1cafe9872b214869b460bddb1af0efa5258ccd9 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 3 Jan 2011 17:03:12 -0800 Subject: [PATCH 044/405] don't allow clients to upload to the wrong dev create_open would give you a list of devices to upload to, but nothing was checking the corresponding create_close. --- lib/MogileFS/Store.pm | 2 +- lib/MogileFS/Worker/Query.pm | 45 ++++++++++++++++++++++++------------ t/00-startup.t | 26 ++++++++++++++++++++- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0da1dd7d..801d2d2e 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1034,7 +1034,7 @@ sub fid_devids_multiple { # return hashref of columns classid, dmid, dkey, given a $fidid, or return undef sub tempfile_row_from_fid { my ($self, $fidid) = @_; - return $self->dbh->selectrow_hashref("SELECT classid, dmid, dkey ". + return $self->dbh->selectrow_hashref("SELECT classid, dmid, dkey, devids ". "FROM tempfile WHERE fid=?", undef, $fidid); } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index beed1e03..b07d8c59 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -368,26 +368,27 @@ sub cmd_create_close { # find the temp file we're closing and making real. If another worker # already has it, bail out---the client closed it twice. + # this is racy, but the only expected use case is a client retrying. + # should still be fixed better once more scalable locking is available. my $trow = $sto->delete_and_return_tempfile_row($fidid) or return $self->err_line("no_temp_file"); - # if a temp file is closed without a provided-key, that means to - # delete it. - unless (defined $key && length($key)) { + # Protect against leaving orphaned uploads. + my $failed = sub { $dfid->add_to_db; $fid->delete; - return $self->ok_line; - } + }; - # see if we have a fid for this key already - my $old_fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); - if ($old_fid) { - # Fail if a file already exists for this fid. Should never - # happen, as it should not be possible to close a file twice. - return $self->err_line("fid_exists") - unless $old_fid->{fidid} != $fidid; + unless ($trow->{devids} =~ m/\b$devid\b/) { + $failed->(); + return $self->err_line("invalid_destdev", "File uploaded to invalid dest $devid. Valid devices were: " . $trow->{devids}); + } - $old_fid->delete; + # if a temp file is closed without a provided-key, that means to + # delete it. + unless (defined $key && length($key)) { + $failed->(); + return $self->ok_line; } # get size of file and verify that it matches what we were given, if anything @@ -399,11 +400,25 @@ sub cmd_create_close { # storage node is unreachable or the file is missing my $type = defined $size ? "missing" : "cantreach"; my $lasterr = MogileFS::Util::last_error(); + $failed->(); return $self->err_line("size_verify_error", "Expected: $args->{size}; actual: 0 ($type); path: $path; error: $lasterr") } - return $self->err_line("size_mismatch", "Expected: $args->{size}; actual: $size; path: $path") - if $args->{size} > -1 && ($args->{size} != $size); + if ($args->{size} > -1 && ($args->{size} != $size)) { + $failed->(); + return $self->err_line("size_mismatch", "Expected: $args->{size}; actual: $size; path: $path") + } + + # see if we have a fid for this key already + my $old_fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); + if ($old_fid) { + # Fail if a file already exists for this fid. Should never + # happen, as it should not be possible to close a file twice. + return $self->err_line("fid_exists") + unless $old_fid->{fidid} != $fidid; + + $old_fid->delete; + } # TODO: check for EIO? diff --git a/t/00-startup.t b/t/00-startup.t index 2942157d..eac15a65 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -20,7 +20,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 70; + plan tests => 72; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -318,6 +318,30 @@ foreach my $t (qw(file file_on file_to_delete)) { }), "table $t is empty"); } +# Test some broken client modes. +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', + Timeout => 3); + die "Failed to connect to test tracker" unless $c; + # Pretend to upload a file, then tell the server weird things. + # Not trying to be defensable to all sorts of things, but ensuring we're + # safe against double close, bad destdev, etc. + print $c "create_open " + . "domain=testdom&fid=0&class=&multi_dest=1&key=fufufu\n"; + my $res = <$c>; + my $fidid; + ok($res =~ m/fid=(\d+)/, "bare create_open worked"); + $fidid = $1; + # Pretend we uploaded something. + print $c "create_close " + . "domain=testdom&fid=$fidid&devid=4&size=0&key=fufufu" + . "&path=http://127.0.1.2:7500/dev4/0/000/000/0000000$fidid.fid\n"; + my $res2 = <$c>; + ok($res2 =~ m/invalid_destdev/, "cannot upload to unlisted destdev"); + + # TODO: test double closing, etc. +} + sub try_for { my ($tries, $code) = @_; for (1..$tries) { From 835e53f80c814e600adcd3c260fa55991d2a3d37 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 3 Jan 2011 18:07:22 -0800 Subject: [PATCH 045/405] add "file_info" command for fetching metadata. --- lib/MogileFS/Worker/Query.pm | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index b07d8c59..28b9d156 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -506,6 +506,39 @@ sub cmd_delete { return $self->ok_line; } +sub cmd_file_info { + my MogileFS::Worker::Query $self = shift; + my $args = shift; + + $args->{dmid} = $self->check_domain($args) + or return $self->err_line('domain_not_found'); + + # validate parameters + my $dmid = $args->{dmid}; + my $key = $args->{key} or return $self->err_line("no_key"); + + my $fid; + Mgd::get_store()->slaves_ok(sub { + $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); + }); + $fid or return $self->err_line("unknown_key"); + + my $ret = {}; + $ret->{fid} = $fid->id; + $ret->{domain} = MogileFS::Domain->name_of_id($fid->dmid); + $ret->{class} = MogileFS::Class->class_name($fid->dmid, $fid->classid); + $ret->{key} = $key; + $ret->{'length'} = $fid->length; + $ret->{devcount} = $fid->devcount; + # Only if requested, also return the raw devids. + # Caller should use get_paths if they intend to fetch the file. + if ($args->{devices}) { + $ret->{devids} = join(',', $fid->devids); + } + + return $self->ok_line($ret); +} + sub cmd_list_fids { my MogileFS::Worker::Query $self = shift; my $args = shift; From 4b9035c0b5c333b3919dbc01a3bcbe5024f6ad3e Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 8 Jan 2011 00:13:55 -0800 Subject: [PATCH 046/405] file_debug command for use with the "mogfiledebug" utility. finds as much about a fid as it can. --- lib/MogileFS/Store.pm | 18 ++++++++++ lib/MogileFS/Worker/Query.pm | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 801d2d2e..0a709380 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1587,6 +1587,24 @@ sub note_done_replicating { my ($self, $fidid) = @_; } +sub find_fid_from_file_to_replicate { + my ($self, $fidid) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, nexttry, fromdevid, failcount, flags FROM file_to_replicate WHERE fid = ?", + undef, $fidid); +} + +sub find_fid_from_file_to_delete2 { + my ($self, $fidid) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, nexttry, failcount FROM file_to_delete2 WHERE fid = ?", + undef, $fidid); +} + +sub find_fid_from_file_to_queue { + my ($self, $fidid, $type) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, devid, type, nexttry, failcount, flags, arg FROM file_to_queue WHERE fid = ? AND type = ?", + undef, $fidid, $type); +} + sub delete_fid_from_file_to_replicate { my ($self, $fidid) = @_; $self->retry_on_deadlock(sub { diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 28b9d156..c76584a6 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -10,6 +10,7 @@ use MogileFS::Util qw(error error_code first weighted_list device_state eurl decode_url_args); use MogileFS::HTTPFile; use MogileFS::Rebalance; +use MogileFS::Config; sub new { my ($class, $psock) = @_; @@ -506,6 +507,69 @@ sub cmd_delete { return $self->ok_line; } +# Takes either domain/dkey or fid and tries to return as much as possible. +sub cmd_file_debug { + my MogileFS::Worker::Query $self = shift; + my $args = shift; + # Talk to the master since this is "debug mode" + my $sto = Mgd::get_store(); + my $ret = {}; + + # If a FID is provided, just use that. + my $fid; + my $fidid; + if ($args->{fid}) { + $fidid = $args->{fid}+0; + # It's not fatal if we don't find the row here. + $fid = $sto->file_row_from_fidid($args->{fid}+0); + } else { + # If not, require dmid/dkey and pick up the fid from there. + $args->{dmid} = $self->check_domain($args) + or return $self->err_line('domain_not_found'); + return $self->err_line("no_key") unless $args->{key}; + $fid = $sto->file_row_from_dmid_key($args->{dmid}, $args->{key}); + return $self->err_line("unknown_key") unless $fid; + $fidid = $fid->{fid}; + } + + if ($fid) { + $fid->{domain} = MogileFS::Domain->name_of_id($fid->{dmid}); + $fid->{class} = MogileFS::Class->class_name($fid->{dmid}, + $fid->{classid}); + } + + # Fetch all of the queue data. + my $tfile = $sto->tempfile_row_from_fid($fidid); + my $repl = $sto->find_fid_from_file_to_replicate($fidid); + my $del = $sto->find_fid_from_file_to_delete2($fidid); + my $reb = $sto->find_fid_from_file_to_queue($fidid, REBAL_QUEUE); + my $fsck = $sto->find_fid_from_file_to_queue($fidid, FSCK_QUEUE); + + # Fetch file_on rows, and turn into paths. + my @devids = $sto->fid_devids($fidid); + for my $devid (@devids) { + # Won't matter if we can't make the path (dev is dead/deleted/etc) + eval { + my $dfid = MogileFS::DevFID->new($devid, $fidid); + my $path = $dfid->get_url; + $ret->{'devpath_' . $devid} = $path; + }; + } + $ret->{devids} = join(',', @devids) if @devids; + + # Return file row (if found) and all other data. + my %toret = (fid => $fid, tempfile => $tfile, replqueue => $repl, + delqueue => $del, rebqueue => $reb, fsckqueue => $fsck); + while (my ($key, $hash) = each %toret) { + while (my ($name, $val) = each %$hash) { + $ret->{$key . '_' . $name} = $val; + } + } + + return $self->err_line("unknown_fid") unless keys %$ret; + return $self->ok_line($ret); +} + sub cmd_file_info { my MogileFS::Worker::Query $self = shift; my $args = shift; From 1b7bc1594e2bb59414ed37ed85702c9230d107b0 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 8 Jan 2011 00:43:29 -0800 Subject: [PATCH 047/405] Checking in changes prior to tagging of version 2.45. Changelog diff is: diff --git a/CHANGES b/CHANGES index 5460b52..edbf417 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,47 @@ +2011-01-08: Release version 2.45 + + * file_debug command (dormando ) + + * add "file_info" command for fetching metadata. (dormando ) + + * don't allow clients to upload to the wrong dev (dormando ) + + * validate database handles in the right places (dormando ) + + * only dbh->ping if not used in over a minute (dormando ) + + * make list_fids work with gaps (dormando ) + + * Postgres enqueue_fids_* cleanup (Robin H. Johnson ) + + * Fixes MogileFS::Store::retry_on_deadlock to honor non-deadlock exceptions (Jason Mills ) + + * Implements tests for MogileFS::Store::retry_on_deadlock (Jason Mills ) + + * Implements tests for MogileFS::Store::ignore_replace (Jason Mills ) + + * job_master was never revalidating its dbh handle. (dormando ) + + * add a couple useful stats (dormando ) + + * Correct iostat command on darwin (Tomas Doran ) + + * Fix test db creation with SQLite (Tomas Doran ) + + * Fix SQLite so tracker at least starts (Tomas Doran ) + + * additional debian packaging fixes (kad ) + + * debian packaging updates. (Andre Bohr ) + + * Documentation updates. (Pyry Hakulinen ) + + * HTTPFile returns -1 if the file is missing from the device. Content-Length: 0 is valid and it should return BLEN not MISS. (Pyry Hakulinen ) + + * some further editing of the info file. (dormando ) + + * Changed repository metadata (Tomas Doran ) + 2010-11-25: Release version 2.44 * Fix issue with rebalance not filtering source devs from the --- CHANGES | 44 ++++++++++++++++++++++++++++++++++++++++++ MogileFS-Server.spec | 2 +- lib/MogileFS/Server.pm | 2 +- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 5460b522..edbf4171 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,47 @@ +2011-01-08: Release version 2.45 + + * file_debug command (dormando ) + + * add "file_info" command for fetching metadata. (dormando ) + + * don't allow clients to upload to the wrong dev (dormando ) + + * validate database handles in the right places (dormando ) + + * only dbh->ping if not used in over a minute (dormando ) + + * make list_fids work with gaps (dormando ) + + * Postgres enqueue_fids_* cleanup (Robin H. Johnson ) + + * Fixes MogileFS::Store::retry_on_deadlock to honor non-deadlock exceptions (Jason Mills ) + + * Implements tests for MogileFS::Store::retry_on_deadlock (Jason Mills ) + + * Implements tests for MogileFS::Store::ignore_replace (Jason Mills ) + + * job_master was never revalidating its dbh handle. (dormando ) + + * add a couple useful stats (dormando ) + + * Correct iostat command on darwin (Tomas Doran ) + + * Fix test db creation with SQLite (Tomas Doran ) + + * Fix SQLite so tracker at least starts (Tomas Doran ) + + * additional debian packaging fixes (kad ) + + * debian packaging updates. (Andre Bohr ) + + * Documentation updates. (Pyry Hakulinen ) + + * HTTPFile returns -1 if the file is missing from the device. Content-Length: 0 is valid and it should return BLEN not MISS. (Pyry Hakulinen ) + + * some further editing of the info file. (dormando ) + + * Changed repository metadata (Tomas Doran ) + 2010-11-25: Release version 2.44 * Fix issue with rebalance not filtering source devs from the diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 866abb08..79a72eb3 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.43 +version: 2.45 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 86af0dbf..4ef3705b 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.44"; +$VERSION = "2.45"; =head1 NAME From cf910270f3fcdde7529a0ad84166eb1655046c55 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 10 Jan 2011 12:44:40 -0800 Subject: [PATCH 048/405] missing $fidid for source_down error message --- lib/MogileFS/Worker/Replicate.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 7ea0e993..4f6fad6c 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -396,7 +396,7 @@ sub replicate { # if they requested a specific source, that source must be up. if ($sdevid && ! grep { $_ == $sdevid} @on_up_devid) { - return $retunlock->(0, "source_down", "Requested replication source device $sdevid not available"); + return $retunlock->(0, "source_down", "Requested replication source device $sdevid not available for $fidid"); } my %dest_failed; # devid -> 1 for each devid we were asked to copy to, but failed. From 7ed6860baadd3cb876d0087522df9891fe021ff0 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 10 Jan 2011 18:54:35 -0800 Subject: [PATCH 049/405] Example application. --- MANIFEST | 3 ++ examples/testapp/README | 3 ++ examples/testapp/testapp-perlbal.conf | 24 +++++++++ examples/testapp/testapp.psgi | 71 +++++++++++++++++++++++++++ 4 files changed, 101 insertions(+) create mode 100644 examples/testapp/README create mode 100644 examples/testapp/testapp-perlbal.conf create mode 100644 examples/testapp/testapp.psgi diff --git a/MANIFEST b/MANIFEST index f290d4bb..34f17819 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,7 @@ CHANGES +examples/testapp/testapp-perlbal.conf +examples/testapp/testapp.psgi +examples/testapp/README doc/fsck-notes.txt doc/memcache-support.txt doc/pluggable-replication-policies.txt diff --git a/examples/testapp/README b/examples/testapp/README new file mode 100644 index 00000000..0a5b3162 --- /dev/null +++ b/examples/testapp/README @@ -0,0 +1,3 @@ +For a walkthrough on starting this example application, see the wiki: + +http://code.google.com/p/mogilefs/wiki/AppExample diff --git a/examples/testapp/testapp-perlbal.conf b/examples/testapp/testapp-perlbal.conf new file mode 100644 index 00000000..d585ba84 --- /dev/null +++ b/examples/testapp/testapp-perlbal.conf @@ -0,0 +1,24 @@ +SERVER max_connections = 100 +SERVER pidfile = perlbal.pid + +XS enable headers + +CREATE POOL testapp_pool + POOL testapp_pool ADD 127.0.0.1:5000 +CREATE SERVICE testapp + SET role = reverse_proxy + SET pool = testapp_pool + SET listen = 0.0.0.0:7070 + SET enable_reproxy = on + SET persist_backend = on + SET backend_persist_cache = 30 + SET connect_ahead = 1 + SET verify_backend = on + SET persist_client = off +ENABLE testapp + +CREATE SERVICE mgmt + SET role = management + SET listen = 127.0.0.1:7071 +ENABLE mgmt + diff --git a/examples/testapp/testapp.psgi b/examples/testapp/testapp.psgi new file mode 100644 index 00000000..8440228a --- /dev/null +++ b/examples/testapp/testapp.psgi @@ -0,0 +1,71 @@ +# It's just a little bit of perl, so hang tight if you're a php/etc user :) +use warnings; +use strict; + +# Import the MogileFS client and a helper util from Plack. +use Plack::Request; +use MogileFS::Client; + +my $TRACKERS = ['tracker1:7001']; +my $DOMAIN = 'toast'; + +# Initialize the client when the server starts. +# You could also do this in the middle of the request. +my $mogc = MogileFS::Client->new(domain => $DOMAIN, + hosts => $TRACKERS); + +sub run { + # Request object for reading paths/cookies/etc. + my $req = shift; + + # Only support GET requests for this example. + # Nothing stops us from supporting HEAD requests, though. + if ($req->method ne 'GET') { + return [ 403, [ 'Content-Type' => 'text/plain' ], + [ 'Only GET methods allowed' ] ]; + } + + # Pull out the GET /whatever path. + my $file = $req->path_info; + + # At this stage you would do some validation, or query your own + # application database for what the MogileFS path actually is. In this + # example we just ensure there is a limited set of characters used. + unless ($file =~ m/^[A-Z0-9.\/\\]+$/gmi) { + return [ 404, [ 'Content-Type' => 'text/plain' ], + [ 'Invalid request format received!' ] ]; + } + + # Ask the MogileFS tracker for the paths to this file. + # At this point you could check memcached for cached paths as well, and + # cache if none were found. + my @paths = $mogc->get_paths($file); + + # If MogileFS returns no paths, the file is likely missing or never + # existed. + unless (@paths) { + return [ 404, [ 'Content-Type' => 'text/plain' ], + [ 'File not found: ' . $file ] ]; + } + + # Now we create the magic Perlbal header, "X-REPROXY-URL". This header + # tells Perlbal to go fetch and return the file from where MogileFS has + # said it is. + # At this point you would add any other headers. If it's a jpeg, you would + # ship the proper 'image/jpeg' Content-Type. In this example we blanket + # serve everything as text/plain. + my $headers = [ 'Content-Type' => 'text/plain', + 'X-REPROXY-URL' => join(' ', @paths) ]; + + # Return a 200 OK, the headers, and no body. The body will be filled in + # with what Perlbal fetches. + return [ 200, $headers, [ ] ]; +} + +# Some simple Plack glue, you can ignore this. +# For a real app you should use a full framework. ;) +my $app = sub { + my $env = shift; + my $req = Plack::Request->new($env); + return run($req); +}; From fb13c720de6db47f9359f676d88d167f117a2252 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 13 Jan 2011 18:17:16 -0800 Subject: [PATCH 050/405] Revert "only dbh->ping if not used in over a minute" This reverts commit d1a18605249b2d7ea52d88f77065c36a5e0858ab. I missunderstood some error handling code, and nothing internally retries if it gets back that the server has gone away. Which means failing over a DB VIP or network issues could hose mogile until restarted. So we roll this back, sorry :( --- lib/MogileFS/Store.pm | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0a709380..70838a14 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -52,8 +52,6 @@ sub new_from_dsn_user_pass { slave_list_cache => [], recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was - recheck_after => 60, # ->ping at most once per minute. - last_used => 0, # how stale is the handle? handles_left => 0, # amount of times this handle can still be verified server_setting_cache => {}, # value-agnostic db setting cache. }, $subclass; @@ -266,22 +264,16 @@ sub recheck_dbh { sub dbh { my $self = shift; - my $now = time(); if ($self->{dbh}) { - if ($self->{last_used} < $now - $self->{recheck_after}) { - $self->{dbh} = undef unless $self->{dbh}->ping; - } if ($self->{recheck_done_gen} != $self->{recheck_req_gen}) { + $self->{dbh} = undef unless $self->{dbh}->ping; # Handles a memory leak under Solaris/Postgres. $self->{dbh} = undef if ($self->{max_handles} && $self->{handles_left}-- < 0); $self->{recheck_done_gen} = $self->{recheck_req_gen}; } - if ($self->{dbh}) { - $self->{last_used} = $now; - return $self->{dbh}; - } + return $self->{dbh} if $self->{dbh}; } $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { @@ -293,7 +285,6 @@ sub dbh { die "Failed to connect to database: " . DBI->errstr; $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; - $self->{last_used} = $now; return $self->{dbh}; } From 8aba6e369c04ab8ec746f462befb65cfad347142 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 13 Jan 2011 18:19:37 -0800 Subject: [PATCH 051/405] Fix job_master crash under postgres Ensure enqueue_for_todo is always used inside enqueue_many_for_todo for less capable RDBMS. The if branch in enqueue_many_for_todo is meant for all calls when the RDBMS cannot handle the fast variants from MySQL, not just when there is more than one item, because that since item will invariably fail. This was previously obscured by retry_on_deadlock fixed in the previous release. --- lib/MogileFS/Store.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 70838a14..a1d18836 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1296,7 +1296,7 @@ sub enqueue_for_todo { # return 1 on success. die otherwise. sub enqueue_many_for_todo { my ($self, $fidids, $type, $in) = @_; - if (@$fidids > 1 && ! ($self->can_insert_multi && ($self->can_replace || $self->can_insertignore))) { + if (! ($self->can_insert_multi && ($self->can_replace || $self->can_insertignore))) { $self->enqueue_for_todo($_, $type, $in) foreach @$fidids; return 1; } From 83562b9dd9b6bf75b75e0217a78fcdb19a31ff52 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 13 Jan 2011 18:25:04 -0800 Subject: [PATCH 052/405] Checking in changes prior to tagging of version 2.46. Changelog diff is: diff --git a/CHANGES b/CHANGES index edbf417..2b6a371 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,14 @@ +2011-01-13: Release version 2.46 + + * Fix job_master crash under postgres (Robin H. Johnson ) + + * Revert "only dbh->ping if not used in over a minute" (dormando ) + *IMPORTANT FIX* if you're using 2.45, you should upgrade to this. + + * Example application. (dormando ) + + * missing $fidid for source_down error message (dormando ) + 2011-01-08: Release version 2.45 * file_debug command (dormando ) --- CHANGES | 11 +++++++++++ MogileFS-Server.spec | 2 +- lib/MogileFS/Server.pm | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index edbf4171..2b6a3711 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,14 @@ +2011-01-13: Release version 2.46 + + * Fix job_master crash under postgres (Robin H. Johnson ) + + * Revert "only dbh->ping if not used in over a minute" (dormando ) + *IMPORTANT FIX* if you're using 2.45, you should upgrade to this. + + * Example application. (dormando ) + + * missing $fidid for source_down error message (dormando ) + 2011-01-08: Release version 2.45 * file_debug command (dormando ) diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 79a72eb3..40eea05c 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.45 +version: 2.46 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 4ef3705b..60b43403 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.45"; +$VERSION = "2.46"; =head1 NAME From 7dbfb44d4f443bc5a9c30d772f3842f564efb714 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 10 Feb 2011 12:20:42 -0800 Subject: [PATCH 053/405] Make postgres use new delete worker code Back in 5b71d709e the MySQL delete_fidid was converted to use the enqueue_for_delete2 code, but the Postgres version was never changed. --- lib/MogileFS/Store/Postgres.pm | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 71e5f5f9..9f27b067 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -717,10 +717,7 @@ sub delete_fidid { $self->condthrow; $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); $self->condthrow; - $self->insert_or_ignore( - insert => "INSERT INTO file_to_delete (fid) VALUES (?)", - insert_vals => [ $fidid ], - ); + $self->enqueue_for_delete2($fidid, 0); $self->condthrow; } From 40f1c550ea52362374c384382440ac991de26e1b Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 20 Jan 2011 00:48:00 -0800 Subject: [PATCH 054/405] new cachable objects + factories device/host aren't finished yet: - need save to db and friends - more tests - some missing interfaces MogFactory needs to be renamed to MogileFS::Factory and the other factories moved underneath. Most tests pass as of this commit. --- lib/MogileFS/Factory.pm | 96 +++++++++++ lib/MogileFS/Factory/Class.pm | 75 +++++++++ lib/MogileFS/Factory/Device.pm | 14 ++ lib/MogileFS/Factory/Domain.pm | 14 ++ lib/MogileFS/Factory/Host.pm | 14 ++ lib/MogileFS/NewClass.pm | 54 ++++++ lib/MogileFS/NewDevice.pm | 296 +++++++++++++++++++++++++++++++++ lib/MogileFS/NewDomain.pm | 41 +++++ lib/MogileFS/NewHost.pm | 104 ++++++++++++ lib/MogileFS/Store.pm | 48 ++++++ t/01-domain-class.t | 153 +++++++++++++++++ t/02-host-device.t | 119 +++++++++++++ 12 files changed, 1028 insertions(+) create mode 100644 lib/MogileFS/Factory.pm create mode 100644 lib/MogileFS/Factory/Class.pm create mode 100644 lib/MogileFS/Factory/Device.pm create mode 100644 lib/MogileFS/Factory/Domain.pm create mode 100644 lib/MogileFS/Factory/Host.pm create mode 100644 lib/MogileFS/NewClass.pm create mode 100644 lib/MogileFS/NewDevice.pm create mode 100644 lib/MogileFS/NewDomain.pm create mode 100644 lib/MogileFS/NewHost.pm create mode 100644 t/01-domain-class.t create mode 100644 t/02-host-device.t diff --git a/lib/MogileFS/Factory.pm b/lib/MogileFS/Factory.pm new file mode 100644 index 00000000..df896ae5 --- /dev/null +++ b/lib/MogileFS/Factory.pm @@ -0,0 +1,96 @@ +package MogileFS::Factory; +use strict; +use warnings; + +=head1 + +MogileFS::MogFactory - singleton class for holding some common objects. + +=head1 ABOUT + +This module holds a singleton for caching objects which are common but +relatively low in number. Such as devices, compared to fids. + +This singleton is to be maintained by the parent process, and inherited to +children during fork. Post-fork, the cache is updated by natural commands, or +a monitor process pushing changes through the parent. + +The purpose is to provide a fresh cache, without forcing new children to +wait for a monitoring run before becoming useful. It also should greatly +reduce the number of simple DB queries, as those should only happen +periodically directly from the monitor job. + +=cut + +my %singleton; + +# Rename to new maybe? +sub get_factory { + my $class = shift; + if (!exists $singleton{$class}) { + $singleton{$class} = bless { + by_id => {}, + by_name => {}, + }, $class; + } + return $singleton{$class}; +} + +# because 'add' means bail if already exists. +sub set { + my $self = shift; + my $obj = shift; + + $self->{by_id}->{$obj->id} = $obj; + $self->{by_name}->{$obj->name} = $obj; + return $obj; +} + +sub remove { + my $self = shift; + my $obj = shift; + + if (exists $self->{by_id}->{$obj->id}) { + delete $self->{by_id}->{$obj->id}; + delete $self->{by_name}->{$obj->name}; + } +} + +sub get_by_id { + my ($self, $id) = @_; + return $self->{by_id}->{$id}; +} + +sub get_by_name { + my ($self, $name) = @_; + return $self->{by_name}->{$name}; +} + +sub get_ids { + my $self = shift; + return keys %{$self->{by_id}}; +} + +sub get_names { + my $self = shift; + return keys %{$self->{by_name}}; +} + +sub get_all { + my $self = shift; + return values %{$self->{by_id}}; +} + +sub map_by_id { + my $self = shift; + my $set = $self->{by_id}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +sub map_by_name { + my $self = shift; + my $set = $self->{by_name}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +1; diff --git a/lib/MogileFS/Factory/Class.pm b/lib/MogileFS/Factory/Class.pm new file mode 100644 index 00000000..a4ec3680 --- /dev/null +++ b/lib/MogileFS/Factory/Class.pm @@ -0,0 +1,75 @@ +package MogileFS::Factory::Class; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::NewClass; + +# This class is a reimplementation since classids and classnames +# are not globally unique... uses the same interface. +# Stupid/wasteful. +sub set { + my ($self, $domain, $args) = @_; + my $domain_factory = MogileFS::Factory::Domain->get_factory; + # FIXME: Inject the dmid into the class somehow. + my $class = MogileFS::NewClass->new_from_args($args, $domain_factory); + $self->{by_id}->{$domain->id}->{$class->id} = $class; + $self->{by_name}->{$domain->id}->{$class->name} = $class; + return $class; +} + +# Example of what we could use for testing. +# Test creates the object, injects its own factory, then hands it to us. +sub set_from_obj { + my ($self, $obj) = @_; +} + +sub remove { + my $self = shift; + my $class = shift; + my $domid = $class->domain->id; + my $clsid = $class->id; + if (exists $self->{by_id}->{$domid}->{$clsid}) { + delete $self->{by_id}->{$domid}->{$clsid}; + delete $self->{by_name}->{$domid}->{$class->name}; + } +} + +sub get_by_id { + my ($self, $domain, $id) = @_; + return $self->{by_id}->{$domain->id}->{$id}; +} + +sub get_by_name { + my ($self, $domain, $name) = @_; + return $self->{by_name}->{$domain->id}->{$name}; +} + +sub get_ids { + my ($self, $domain) = @_; + return keys %{$self->{by_id}->{$domain->id}}; +} + +sub get_names { + my ($self, $domain) = @_; + return keys %{$self->{by_name}->{$domain->id}}; +} + +sub get_all { + my ($self, $domain) = @_; + return values %{$self->{by_id}->{$domain->id}}; +} + +sub map_by_id { + my ($self, $domain) = @_; + my $set = $self->{by_id}->{$domain->id}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +sub map_by_name { + my ($self, $domain) = @_; + my $set = $self->{by_name}->{$domain->id}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +1; diff --git a/lib/MogileFS/Factory/Device.pm b/lib/MogileFS/Factory/Device.pm new file mode 100644 index 00000000..91b3af9f --- /dev/null +++ b/lib/MogileFS/Factory/Device.pm @@ -0,0 +1,14 @@ +package MogileFS::Factory::Device; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::NewDevice; + +sub set { + my ($self, $args) = @_; + my $hostfactory = MogileFS::Factory::Host->get_factory; + return $self->SUPER::set(MogileFS::NewDevice->new_from_args($args, $hostfactory)); +} + +1; diff --git a/lib/MogileFS/Factory/Domain.pm b/lib/MogileFS/Factory/Domain.pm new file mode 100644 index 00000000..92fc60ef --- /dev/null +++ b/lib/MogileFS/Factory/Domain.pm @@ -0,0 +1,14 @@ +package MogileFS::Factory::Domain; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::NewDomain; + +sub set { + my ($self, $args) = @_; + my $classfactory = MogileFS::Factory::Class->get_factory; + return $self->SUPER::set(MogileFS::NewDomain->new_from_args($args, $classfactory)); +} + +1; diff --git a/lib/MogileFS/Factory/Host.pm b/lib/MogileFS/Factory/Host.pm new file mode 100644 index 00000000..08b6554c --- /dev/null +++ b/lib/MogileFS/Factory/Host.pm @@ -0,0 +1,14 @@ +package MogileFS::Factory::Host; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::NewHost; + +sub set { + my ($self, $args) = @_; + my $devfactory = MogileFS::Factory::Device->get_factory; + return $self->SUPER::set(MogileFS::NewHost->new_from_args($args, $devfactory)); +} + +1; diff --git a/lib/MogileFS/NewClass.pm b/lib/MogileFS/NewClass.pm new file mode 100644 index 00000000..5bcf050b --- /dev/null +++ b/lib/MogileFS/NewClass.pm @@ -0,0 +1,54 @@ +package MogileFS::NewClass; +use strict; +use warnings; +use MogileFS::Util qw(throw); + +=head1 + +MogileFS::NewClass - Class class. + +=cut + +sub new_from_args { + my ($class, $args, $domain_factory) = @_; + return bless { + domain_factory => $domain_factory, + mindevcount => 2, + %{$args}, + }, $class; +} + +# Instance methods: + +sub id { $_[0]{classid} } +sub name { $_[0]{classname} } +sub mindevcount { $_[0]{mindevcount} } + +sub repl_policy_string { + my $self = shift; + return $self->{replpolicy} ? $self->{replpolicy} + : 'MultipleHosts()'; +} + +sub repl_policy_obj { + my $self = shift; + if (! $self->{_repl_policy_obj}) { + my $polstr = $self->repl_policy_string; + # Parses the string. + $self->{_repl_policy_obj} = + MogileFS::ReplicationPolicy->new_from_policy_string($polstr); + } + return $self->{_repl_policy_obj}; +} + +sub domain { + my $self = shift; + return $self->{domain_factory}->get_by_id($self->{dmid}); +} + +sub has_files { + my $self = shift; + return Mgd::get_store()->class_has_files($self->{dmid}, $self->id); +} + +1; diff --git a/lib/MogileFS/NewDevice.pm b/lib/MogileFS/NewDevice.pm new file mode 100644 index 00000000..bdae2ebf --- /dev/null +++ b/lib/MogileFS/NewDevice.pm @@ -0,0 +1,296 @@ +package MogileFS::NewDevice; +use strict; +use warnings; +use Carp qw/croak/; +use MogileFS::Util qw(throw); +use MogileFS::Util qw(okay_args device_state error); + +=head1 + +MogileFS::NewDevice - device class + +=cut + +BEGIN { + my $testing = $ENV{TESTING} ? 1 : 0; + eval "sub TESTING () { $testing }"; +} + +my @fields = qw/hostid status weight observed_state mb_total mb_used mb_asof +utilization devid/; + +sub new_from_args { + my ($class, $args, $host_factory) = @_; + my $self = bless { + host_factory => $host_factory, + %{$args}, + }, $class; + + $self->host || die "No host for $self->{devid} (host $self->{hostid})"; + + croak "invalid device observed state '$self->{observed_state}', valid: writeable, readable, unreachable" + if $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; + + return $self; +} + +# Instance methods + +sub id { return $_[0]{devid} } +sub name { return $_[0]{devid} } +sub status { return $_[0]{status} } +sub weight { return $_[0]{weight} } +sub hostid { return $_[0]{hostid} } + +# FIXME: This shouldn't be necessary anymore? +sub t_init { + my ($self, $hostid, $state) = @_; + + my $dstate = device_state($state) or + die "Bogus state"; + + $self->{hostid} = $hostid; + $self->{status} = $state; + $self->{observed_state} = "writeable"; + + # say it's 10% full, of 1GB + $self->{mb_total} = 1000; + $self->{mb_used} = 100; +} + +sub host { + my $self = shift; + return $self->{host_factory}->get_by_id($self->{hostid}); +} + +# returns 0 if not known, else [0,1] +sub percent_free { + my $self = shift; + return 0 unless $self->{mb_total} && defined $self->{mb_used}; + return 1 - ($self->{mb_used} / $self->{mb_total}); +} + +# returns undef if not known, else [0,1] +sub percent_full { + my $self = shift; + return undef unless $self->{mb_total} && defined $self->{mb_used}; + return $self->{mb_used} / $self->{mb_total}; +} + +# FIXME: $self->mb_free? +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + my $ret = { map { $_ => $self->{$_} } @tofetch }; + return $ret; +} + +sub observed_utilization { + my $self = shift; + + if (TESTING) { + my $weight_varname = 'T_FAKE_IO_DEV' . $self->id; + return $ENV{$weight_varname} if defined $ENV{$weight_varname}; + } + + return $self->{utilization}; +} + +sub observed_writeable { + my $self = shift; + return 0 unless $self->{observed_state} && $self->{observed_state} eq 'writeable'; + my $host = $self->host or return 0; + return 0 unless $host->observed_reachable; + return 1; +} + +sub observed_readable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'readable'; +} + +sub observed_unreachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; +} + +# FIXME: This pattern is weird. Store the object on new? +sub dstate { + my $ds = device_state($_[0]->status); + return $ds if $ds; + error("dev$_[0]->{devid} has bogus status '$_[0]->{status}', pretending 'down'"); + return device_state("down"); +} + +sub can_delete_from { + return $_[0]->dstate->can_delete_from; +} + +sub can_read_from { + return $_[0]->dstate->can_read_from; +} + +# FIXME: Is there a (unrelated to this code) bug where new files aren't tested +# against the free space limit before being stored or replicated somewhere? +sub should_get_new_files { + my $self = shift; + my $dstate = $self->dstate; + + return 0 unless $dstate->should_get_new_files; + return 0 unless $self->observed_writeable; + return 0 unless $self->host->should_get_new_files; + # have enough disk space? (default: 100MB) + my $min_free = MogileFS->config("min_free_space"); + return 0 if $self->{mb_total} && + $self->mb_free < $min_free; + + return 1; +} + +sub mb_free { + my $self = shift; + return $self->{mb_total} - $self->{mb_used}; +} + +sub mb_used { + return $_[0]->{mb_used}; +} + +# currently the same policy, but leaving it open for differences later. +sub should_get_replicated_files { + return $_[0]->should_get_new_files; +} + +sub not_on_hosts { + my ($self, @hosts) = @_; + my @hostids = map { ref($_) ? $_->hostid : $_ } @hosts; + my $my_hostid = $self->hostid; + return (grep { $my_hostid == $_ } @hostids) ? 0 : 1; +} + +# "cached" by nature of the monitor worker testing this. +sub doesnt_know_mkcol { + return $_[0]->{no_mkcol}; +} + +# Gross class-based singleton cache. +my %dir_made; # /dev/path -> $time +my $dir_made_lastclean = 0; +# returns 1 on success, 0 on failure +sub create_directory { + my ($self, $uri) = @_; + return 1 if $self->doesnt_know_mkcol; + + # rfc2518 says we "should" use a trailing slash. Some servers + # (nginx) appears to require it. + $uri .= '/' unless $uri =~ m/\/$/; + + return 1 if $dir_made{$uri}; + + my $hostid = $self->hostid; + my $host = $self->host; + my $hostip = $host->ip or return 0; + my $port = $host->http_port or return 0; + my $peer = "$hostip:$port"; + + my $sock = IO::Socket::INET->new(PeerAddr => $peer, Timeout => 1) + or return 0; + + print $sock "MKCOL $uri HTTP/1.0\r\n". + "Content-Length: 0\r\n\r\n"; + + my $ans = <$sock>; + + # if they don't support this method, remember that + if ($ans && $ans =~ m!HTTP/1\.[01] (400|501)!) { + $self->{no_mkcol} = 1; + # TODO: move this into method in *monitor* worker + return 1; + } + + return 0 unless $ans && $ans =~ m!^HTTP/1.[01] 2\d\d!; + + my $now = time(); + $dir_made{$uri} = $now; + + # cleanup %dir_made occasionally. + my $clean_interval = 300; # every 5 minutes. + if ($dir_made_lastclean < $now - $clean_interval) { + $dir_made_lastclean = $now; + foreach my $k (keys %dir_made) { + delete $dir_made{$k} if $dir_made{$k} < $now - 3600; + } + } + return 1; +} + +sub fid_list { + my ($self, %opts) = @_; + my $limit = delete $opts{limit}; + croak("No limit specified") unless $limit && $limit =~ /^\d+$/; + croak("Unknown options to fid_list") if %opts; + + my $sto = Mgd::get_store(); + my $fidids = $sto->get_fidids_by_device($self->devid, $limit); + return map { + MogileFS::FID->new($_) + } @{$fidids || []}; +} + +sub fid_chunks { + my ($self, %opts) = @_; + + my $sto = Mgd::get_store(); + # storage function does validation. + my $fidids = $sto->get_fidid_chunks_by_device(devid => $self->devid, %opts); + return map { + MogileFS::FID->new($_) + } @{$fidids || []}; +} + +sub forget_about { + my ($self, $fid) = @_; + Mgd::get_store()->remove_fidid_from_devid($fid->id, $self->id); + return 1; +} + +sub usage_url { + my $self = shift; + my $host = $self->host; + my $get_port = $host->http_get_port; + my $hostip = $host->ip; + return "http://$hostip:$get_port/dev$self->{devid}/usage"; +} + +sub can_change_to_state { + my ($self, $newstate) = @_; + # don't allow dead -> alive transitions. (yes, still possible + # to go dead -> readonly -> alive to bypass this, but this is + # all more of a user-education thing than an absolute policy) + return 0 if $self->dstate->is_perm_dead && $newstate eq 'alive'; + return 1; +} + +sub vivify_directories { + my ($self, $path) = @_; + + # $path is something like: + # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid + + # three directories we'll want to make: + # http://10.0.0.26:7500/dev2/0 + # http://10.0.0.26:7500/dev2/0/000 + # http://10.0.0.26:7500/dev2/0/000/148 + + croak "non-HTTP mode no longer supported" unless $path =~ /^http/; + return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; + my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); + + die "devid mismatch" unless $self->id == $devid; + + $self->create_directory("/dev$devid/$p1"); + $self->create_directory("/dev$devid/$p1/$p2"); + $self->create_directory("/dev$devid/$p1/$p2/$p3"); +} + +1; diff --git a/lib/MogileFS/NewDomain.pm b/lib/MogileFS/NewDomain.pm new file mode 100644 index 00000000..20116756 --- /dev/null +++ b/lib/MogileFS/NewDomain.pm @@ -0,0 +1,41 @@ +package MogileFS::NewDomain; +use strict; +use warnings; +use MogileFS::Util qw(throw); + +=head1 + +MogileFS::NewDomain - domain class. + +=cut + +sub new_from_args { + my ($class, $args, $class_factory) = @_; + return bless { + class_factory => $class_factory, + %{$args}, + }, $class; +} + +# Instance methods: + +sub id { $_[0]{dmid} } +sub name { $_[0]{namespace} } + +sub has_files { + my $self = shift; + return 1 if $Mgd::_T_DOM_HAS_FILES; + return Mgd::get_store()->domain_has_files($self->id); +} + +sub classes { + my $self = shift; + return $self->{class_factory}->get_all($self); +} + +sub class { + my $self = shift; + return $self->{class_factory}->get_by_name($self, $_[0]); +} + +1; diff --git a/lib/MogileFS/NewHost.pm b/lib/MogileFS/NewHost.pm new file mode 100644 index 00000000..a39646cb --- /dev/null +++ b/lib/MogileFS/NewHost.pm @@ -0,0 +1,104 @@ +package MogileFS::NewHost; +use strict; +use warnings; +use MogileFS::Util qw(throw); +use Net::Netmask; +use Carp qw(croak); +use MogileFS::Connection::Mogstored; + +=head1 + +MogileFS::NewHost - host class + +=cut + +# Centralized here instead of three places. +my @fields = qw/hostid hostname hostip status http_port http_get_port altip altmask/; + +# TODO: Validate a few things: state, observed state. +sub new_from_args { + my ($class, $args, $dev_factory) = @_; + my $self = bless { + dev_factory => $dev_factory, + %{$args}, + }, $class; + + $self->{mask} = ($self->{altip} && $self->{altmask}) ? + Net::Netmask->new2($self->{altmask}) : undef; + + return $self; +} + +sub valid_state { + my ($class, $state) = @_; + return $state && $state =~ /^alive|dead|down$/; +} + +# Instance methods: + +sub id { $_[0]{hostid} } +sub name { $_[0]{hostname} } +sub hostname { $_[0]{hostname} } +sub hostip { $_[0]{hostip} } +sub status { $_[0]{status} } +sub http_port { $_[0]{http_port} } + +sub http_get_port { + return $_[0]->{http_get_port} || $_[0]->{http_port}; +} + +sub ip { + my $self = shift; + if ($self->{mask} && $self->{altip} && + ($MogileFS::REQ_altzone || ($MogileFS::REQ_client_ip && + $self->{mask}->match($MogileFS::REQ_client_ip)))) { + return $self->{altip}; + } else { + return $self->{hostip}; + } +} + +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + return { map { $_ => $self->{$_} } @tofetch }; +} + +sub should_get_new_files { + return $_[0]->status eq 'alive'; +} + +sub t_init { + my $self = shift; + my $status = shift; + $self->{status} = $status; + $self->{observed_state} = "reachable"; +} + +sub observed_reachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'reachable'; +} + +sub observed_unreachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; +} + +# returns/creates a MogileFS::Connection::Mogstored object to the +# host's mogstored management/side-channel port (which starts +# unconnected, and only connects when you ask it to, with its sock +# method) +sub mogstored_conn { + my $self = shift; + return $self->{mogstored_conn} ||= + MogileFS::Connection::Mogstored->new($self->ip, $self->sidechannel_port); +} + +sub sidechannel_port { + # TODO: let this be configurable per-host? currently it's configured + # once for all machines. + MogileFS->config("mogstored_stream_port"); +} + +1; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index a1d18836..32838414 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -341,6 +341,7 @@ sub conddup { my ($self, $code) = @_; my $rv = eval { $code->(); }; throw("dup") if $self->was_duplicate_error; + croak($@) if $@; return $rv; } @@ -735,6 +736,8 @@ sub delete_host { # return true if deleted, 0 if didn't exist, exception if error sub delete_domain { my ($self, $dmid) = @_; + throw("has_files") if $self->domain_has_files($dmid); + throw("has_classes") if $self->domain_has_classes($dmid); return $self->dbh->do("DELETE FROM domain WHERE dmid = ?", undef, $dmid); } @@ -745,6 +748,13 @@ sub domain_has_files { return $has_a_fid ? 1 : 0; } +sub domain_has_classes { + my ($self, $dmid) = @_; + my $has_a_class = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? LIMIT 1', + undef, $dmid); + return $has_a_class ? 1 : 0; +} + sub class_has_files { my ($self, $dmid, $clid) = @_; my $has_a_fid = $self->dbh->selectrow_array('SELECT fid FROM file WHERE dmid = ? AND classid = ? LIMIT 1', @@ -1042,6 +1052,18 @@ sub create_device { return 1; } +sub update_device { + my ($self, $devid, $to_update) = @_; + my @keys = sort keys %$to_update; + return unless @keys; + $self->conddup(sub { + $self->dbh->do("UPDATE device SET " . join('=?, ', @keys) + . "=? WHERE devid=?", undef, (map { $to_update->{$_} } @keys), + $devid); + }); + return 1; +} + sub update_device_usage { my $self = shift; my %arg = $self->_valid_params([qw(mb_total mb_used devid)], @_); @@ -1052,6 +1074,19 @@ sub update_device_usage { $self->condthrow; } +# This is unimplemented at the moment as we must verify: +# - no file_on rows exist +# - nothing in file_to_queue is going to attempt to use it +# - nothing in file_to_replicate is going to attempt to use it +# - it's already been marked dead +# - that all trackers are likely to know this :/ +# - ensure the devid can't be reused +# IE; the user can't mark it dead then remove it all at once and cause their +# cluster to implode. +sub delete_device { + die "Unimplemented; needs further testing"; +} + sub mark_fidid_unreachable { my ($self, $fidid) = @_; die "Your database does not support REPLACE! Reimplement mark_fidid_unreachable!" unless $self->can_replace; @@ -1077,6 +1112,7 @@ sub set_device_state { sub delete_class { my ($self, $dmid, $cid) = @_; + throw("has_files") if $self->class_has_files($dmid, $cid); eval { $self->dbh->do("DELETE FROM class WHERE dmid = ? AND classid = ?", undef, $dmid, $cid); }; @@ -1441,6 +1477,18 @@ sub create_domain { die "failed to make domain"; # FIXME: the above is racy. } +sub update_host { + my ($self, $hid, $to_update) = @_; + my @keys = sort keys %$to_update; + return unless @keys; + $self->conddup(sub { + $self->dbh->do("UPDATE host SET " . join('=?, ', @keys) + . "=? WHERE hostid=?", undef, (map { $to_update->{$_} } @keys), + $hid); + }); + return 1; +} + sub update_host_property { my ($self, $hostid, $col, $val) = @_; $self->conddup(sub { diff --git a/t/01-domain-class.t b/t/01-domain-class.t new file mode 100644 index 00000000..69336da6 --- /dev/null +++ b/t/01-domain-class.t @@ -0,0 +1,153 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Factory; +use MogileFS::Factory::Domain; +use MogileFS::Factory::Class; +use MogileFS::NewDomain; +use MogileFS::NewClass; + +use Data::Dumper qw/Dumper/; + +my $sto = eval { temp_store(); }; +if ($sto) { + plan tests => 33; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +# Fetch the factories. +my $domfac = MogileFS::Factory::Domain->get_factory; +ok($domfac, "got a domain factory"); +my $classfac = MogileFS::Factory::Class->get_factory; +ok($classfac, "got a class factory"); + +# Ensure the inherited singleton is good. +ok($domfac != $classfac, "factories are not the same singleton"); + +{ + # Add in a test domain. + my $dom = $domfac->set({ dmid => 1, namespace => 'toast'}); + ok($dom, "made a new domain object"); + is($dom->id, 1, "domain id is 1"); + is($dom->name, 'toast', 'domain namespace is toast'); + + # Add in a test class. + my $cls = $classfac->set($dom, { classid => 1, dmid => 1, mindevcount => 3, + replpolicy => '', classname => 'fried'}); + ok($cls, "got a class object"); + is($cls->id, 1, "class id is 1"); + is($cls->name, 'fried', 'class name is fried'); + is(ref($cls->domain), 'MogileFS::NewDomain', + 'class can find a domain object'); +} + +# Add a few more classes and domains. +{ + my $dom2 = $domfac->set({ dmid => 2, namespace => 'harro' }); + $classfac->set($dom2, { classid => 1, dmid => 2, mindevcount => 2, + replpolicy => '', classname => 'red' }); + $classfac->set($dom2, { classid => 2, dmid => 2, mindevcount => 3, + replpolicy => 'MultipleHosts(2)', classname => 'green' }); + $classfac->set($dom2, { classid => 3, dmid => 2, mindevcount => 4, + replpolicy => 'MultipleHosts(5)', classname => 'blue' }); +} + +# Ensure the select and remove factory methods work. +{ + my $dom = $domfac->get_by_id(1); + is($dom->name, 'toast', 'got the right domain from get_by_id'); +} + +{ + my $dom = $domfac->get_by_name('harro'); + is($dom->id, 2, 'got the right domain from get_by_name'); +} + +{ + my @doms = $domfac->get_all; + is(scalar(@doms), 2, 'got two domains back from get_all'); + for (@doms) { + is(ref($_), 'MogileFS::NewDomain', 'and both are domains'); + } + isnt($doms[0]->id, $doms[1]->id, 'and both are not the same'); +} + +{ + my $dom = $domfac->get_by_name('harro'); + my $clsmap = $classfac->map_by_id($dom); + is(ref($clsmap), 'HASH', 'got a mapped class hash'); + is($clsmap->{2}->name, 'green', 'got the right class set'); + + $classfac->remove($clsmap->{2}); + + my $cls = $classfac->get_by_name($dom, 'green'); + ok(!$cls, "class removed from factory"); +} + +# Test the domain routines harder. +{ + my $dom = $domfac->get_by_name('harro'); + my @classes = $dom->classes; + is(scalar(@classes), 2, 'found two classes'); + + ok($dom->class('blue'), 'found the blue class'); + ok(!$dom->class('fried'), 'did not find the fried class'); +} + +# Test the class routines harder. +{ + my $dom = $domfac->get_by_name('harro'); + my $cls = $dom->class('blue'); + my $polobj = $cls->repl_policy_obj; + ok($polobj, 'class can create policy object'); +} + +# Add a domain and two classes to the DB. +{ + my $domid = $sto->create_domain('foo'); + ok($domid, 'new domain stored in database: ' . $domid); + + my $clsid1 = $sto->create_class($domid, 'bar'); + my $clsid2 = $sto->create_class($domid, 'baz'); + is($clsid1, 1, 'new class1 stored in database'); + is($clsid2, 2, 'new class2 stored in database'); + + ok($sto->update_class_mindevcount(dmid => $domid, classid => $clsid2, + mindevcount => 3), 'can set mindevcount'); + ok($sto->update_class_replpolicy(dmid => $domid, classid => $clsid2, + replpolicy => 'MultipleHosts(6)'), 'can set replpolicy'); + ok($sto->update_class_name(dmid => $domid, classid => $clsid2, + classname => 'boo'), 'can rename class'); +} + +{ + # Reload from the DB and confirm they came back the way they went in. + my %domains = $sto->get_all_domains; + ok(exists $domains{foo}, 'domain foo exists'); + is($domains{foo}, 1, 'and the id is 1'); + my @classes = $sto->get_all_classes; + is_deeply($classes[0], { + 'replpolicy' => undef, + 'dmid' => '1', + 'classid' => '1', + 'mindevcount' => '2', + 'classname' => 'bar' + }, 'class bar came back'); + # We edited class2 a bunch, make sure that all stuck. + is_deeply($classes[1], { + 'replpolicy' => 'MultipleHosts(6)', + 'dmid' => '1', + 'classid' => '2', + 'mindevcount' => '3', + 'classname' => 'boo' + }, 'class baz came back as boo'); +} diff --git a/t/02-host-device.t b/t/02-host-device.t new file mode 100644 index 00000000..bc2f1577 --- /dev/null +++ b/t/02-host-device.t @@ -0,0 +1,119 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Factory; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use MogileFS::NewHost; +use MogileFS::NewDevice; + +use Data::Dumper qw/Dumper/; + +my $sto = eval { temp_store(); }; +if ($sto) { + plan tests => 21; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +# Fetch the factories. +my $hostfac = MogileFS::Factory::Host->get_factory; +ok($hostfac, "got a host factory"); +my $devfac = MogileFS::Factory::Device->get_factory; +ok($devfac, "got a device factory"); + +MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + +# Ensure the inherited singleton is good. +ok($hostfac != $devfac, "factories are not the same singleton"); + +{ + # Test host. + my $host = $hostfac->set({ hostid => 1, hostname => 'foo', hostip => +'127.0.0.5', status => 'alive', http_port => 7500, observed_state => +'reachable'}); + ok($host, 'made a new host object'); + is($host->id, 1, 'host id is 1'); + is($host->name, 'foo', 'host name is foo'); + + # Test device. + my $dev = $devfac->set({ devid => 1, hostid => 1, status => 'alive', +weight => 100, mb_total => 5000, mb_used => 300, mb_asof => 1295217165, +observed_state => 'writeable'}); + ok($dev, 'made a new dev object'); + is($dev->id, 1, 'dev id is 1'); + is($dev->host->name, 'foo', 'name of devs host is foo'); + ok($dev->can_delete_from, 'can_delete_from works'); + ok($dev->can_read_from, 'can_read_from works'); + ok($dev->should_get_new_files, 'should_get_new_files works'); + + $hostfac->remove($host); + $devfac->remove($dev); +} + +# Might be able to skip the factory tests, as domain/class cover those. + +{ + # Add a host and two devices to the DB. + my $hostid = $sto->create_host('foo', '127.0.0.7'); + is($hostid, 1, 'new host got id 1'); + + # returns 1 instead of the devid :( + # since this it the only place which doesn't autogenerate its id. + ok($sto->create_device(1, $hostid, 'alive'), 'created dev1'); + ok($sto->create_device(2, $hostid, 'down'), 'created dev2'); + + # Update host details to DB and ensure they stick. + ok($sto->update_host($hostid, { http_port => 6500, http_get_port => 6501 }), + 'updated host DB entry'); + # Update device details in DB and ensure they stick. + ok($sto->update_device(1, { mb_total => 150, mb_used => 8 }), + 'updated dev1 DB entry'); + ok($sto->update_device(2, { mb_total => 100, mb_used => 3, + status => 'dead' }), 'updated dev2 DB entry'); +} + +{ + # Reload from DB and confirm they match what we had before. + my @hosts = $sto->get_all_hosts; + my @devs = $sto->get_all_devices; + + is_deeply($hosts[0], { + 'http_get_port' => 6501, + 'status' => 'down', + 'http_port' => '6500', + 'hostip' => '127.0.0.7', + 'hostname' => 'foo', + 'hostid' => '1', + 'altip' => undef, + 'altmask' => undef + }, 'host is as expected'); + + is_deeply($devs[0], { + 'mb_total' => 150, + 'mb_used' => 8, + 'status' => 'alive', + 'devid' => '1', + 'weight' => '100', + 'mb_asof' => undef, + 'hostid' => '1' + }, 'dev1 is as expected'); + is_deeply($devs[1], { + 'mb_total' => 100, + 'mb_used' => 3, + 'status' => 'dead', + 'devid' => '2', + 'weight' => '100', + 'mb_asof' => undef, + 'hostid' => '1' + }, 'dev2 is as expected'); +} + From 2ee6113e36b15d91fd11d5f92a135bb5b225b229 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 24 May 2011 18:14:20 -0700 Subject: [PATCH 055/405] Giant awful patch to enable new object monitoring Both the old code and new code runs in parallel. The patch is mostly awful interspersed with areas of elegance. --- lib/MogileFS/Factory.pm | 1 - lib/MogileFS/Factory/Class.pm | 58 ++++-- lib/MogileFS/NewClass.pm | 1 + lib/MogileFS/NewDevice.pm | 14 +- lib/MogileFS/NewHost.pm | 8 +- lib/MogileFS/ProcManager.pm | 11 +- lib/MogileFS/Server.pm | 9 + lib/MogileFS/Util.pm | 41 +++++ lib/MogileFS/Worker.pm | 7 +- lib/MogileFS/Worker/JobMaster.pm | 16 +- lib/MogileFS/Worker/Monitor.pm | 292 ++++++++++++++++++++++++++++++- t/01-domain-class.t | 8 +- 12 files changed, 423 insertions(+), 43 deletions(-) diff --git a/lib/MogileFS/Factory.pm b/lib/MogileFS/Factory.pm index df896ae5..089ee4e1 100644 --- a/lib/MogileFS/Factory.pm +++ b/lib/MogileFS/Factory.pm @@ -40,7 +40,6 @@ sub get_factory { sub set { my $self = shift; my $obj = shift; - $self->{by_id}->{$obj->id} = $obj; $self->{by_name}->{$obj->name} = $obj; return $obj; diff --git a/lib/MogileFS/Factory/Class.pm b/lib/MogileFS/Factory/Class.pm index a4ec3680..5044fcfd 100644 --- a/lib/MogileFS/Factory/Class.pm +++ b/lib/MogileFS/Factory/Class.pm @@ -9,15 +9,28 @@ use MogileFS::NewClass; # are not globally unique... uses the same interface. # Stupid/wasteful. sub set { - my ($self, $domain, $args) = @_; + my ($self, $args) = @_; my $domain_factory = MogileFS::Factory::Domain->get_factory; - # FIXME: Inject the dmid into the class somehow. + my $class = MogileFS::NewClass->new_from_args($args, $domain_factory); - $self->{by_id}->{$domain->id}->{$class->id} = $class; - $self->{by_name}->{$domain->id}->{$class->name} = $class; + my $dmid = $class->dmid; + $self->{by_id}->{$dmid}->{$class->id} = $class; + $self->{by_name}->{$dmid}->{$class->name} = $class; return $class; } +# Class factory is very awkward. Lets be very flexible in what we take; a +# domain object + id, a dmid, or a string with dmid-classid. +sub _find_ids { + my $self = shift; + my $dom = shift; + my $dmid = ref $dom ? $dom->id : $dom; + if ($dmid =~ m/^(\d+)-(\d+)$/) { + return $1, $2; + } + return $dmid, @_; +} + # Example of what we could use for testing. # Test creates the object, injects its own factory, then hands it to us. sub set_from_obj { @@ -27,7 +40,7 @@ sub set_from_obj { sub remove { my $self = shift; my $class = shift; - my $domid = $class->domain->id; + my $domid = $class->dmid; my $clsid = $class->id; if (exists $self->{by_id}->{$domid}->{$clsid}) { delete $self->{by_id}->{$domid}->{$clsid}; @@ -36,39 +49,46 @@ sub remove { } sub get_by_id { - my ($self, $domain, $id) = @_; - return $self->{by_id}->{$domain->id}->{$id}; + my $self = shift; + my ($dmid, $id) = $self->_find_ids(@_); + return $self->{by_id}->{$dmid}->{$id}; } sub get_by_name { - my ($self, $domain, $name) = @_; - return $self->{by_name}->{$domain->id}->{$name}; + my $self = shift; + my ($dmid, $name) = $self->_find_ids(@_); + return $self->{by_name}->{$dmid}->{$name}; } sub get_ids { - my ($self, $domain) = @_; - return keys %{$self->{by_id}->{$domain->id}}; + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return keys %{$self->{by_id}->{$dmid}}; } sub get_names { - my ($self, $domain) = @_; - return keys %{$self->{by_name}->{$domain->id}}; + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return keys %{$self->{by_name}->{$dmid}}; } sub get_all { - my ($self, $domain) = @_; - return values %{$self->{by_id}->{$domain->id}}; + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return values %{$self->{by_id}->{$dmid}}; } sub map_by_id { - my ($self, $domain) = @_; - my $set = $self->{by_id}->{$domain->id}; + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + my $set = $self->{by_id}->{$dmid}; return { map { $_ => $set->{$_} } keys %{$set} }; } sub map_by_name { - my ($self, $domain) = @_; - my $set = $self->{by_name}->{$domain->id}; + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + my $set = $self->{by_name}->{$dmid}; return { map { $_ => $set->{$_} } keys %{$set} }; } diff --git a/lib/MogileFS/NewClass.pm b/lib/MogileFS/NewClass.pm index 5bcf050b..059c78aa 100644 --- a/lib/MogileFS/NewClass.pm +++ b/lib/MogileFS/NewClass.pm @@ -23,6 +23,7 @@ sub new_from_args { sub id { $_[0]{classid} } sub name { $_[0]{classname} } sub mindevcount { $_[0]{mindevcount} } +sub dmid { $_[0]{dmid} } sub repl_policy_string { my $self = shift; diff --git a/lib/MogileFS/NewDevice.pm b/lib/MogileFS/NewDevice.pm index bdae2ebf..ce88a370 100644 --- a/lib/MogileFS/NewDevice.pm +++ b/lib/MogileFS/NewDevice.pm @@ -16,8 +16,9 @@ BEGIN { eval "sub TESTING () { $testing }"; } -my @fields = qw/hostid status weight observed_state mb_total mb_used mb_asof -utilization devid/; +my @observed_fields = qw/observed_state utilization/; +my @fields = (qw/hostid status weight mb_total mb_used mb_asof devid/, + @observed_fields); sub new_from_args { my ($class, $args, $host_factory) = @_; @@ -26,10 +27,11 @@ sub new_from_args { %{$args}, }, $class; - $self->host || die "No host for $self->{devid} (host $self->{hostid})"; + # FIXME: No guarantee (as of now?) that hosts get loaded before devs. + #$self->host || die "No host for $self->{devid} (host $self->{hostid})"; croak "invalid device observed state '$self->{observed_state}', valid: writeable, readable, unreachable" - if $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; + if $self->{observed_state} && $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; return $self; } @@ -85,6 +87,10 @@ sub fields { return $ret; } +sub observed_fields { + return $_[0]->fields(@observed_fields); +} + sub observed_utilization { my $self = shift; diff --git a/lib/MogileFS/NewHost.pm b/lib/MogileFS/NewHost.pm index a39646cb..71970c27 100644 --- a/lib/MogileFS/NewHost.pm +++ b/lib/MogileFS/NewHost.pm @@ -13,7 +13,9 @@ MogileFS::NewHost - host class =cut # Centralized here instead of three places. -my @fields = qw/hostid hostname hostip status http_port http_get_port altip altmask/; +my @observed_fields = qw/observed_state/; +my @fields = (qw/hostid hostname hostip status http_port http_get_port altip altmask/, + @observed_fields); # TODO: Validate a few things: state, observed state. sub new_from_args { @@ -64,6 +66,10 @@ sub fields { return { map { $_ => $self->{$_} } @tofetch }; } +sub observed_fields { + return $_[0]->fields(@observed_fields); +} + sub should_get_new_files { return $_[0]->status eq 'alive'; } diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 1e8d7a50..224e1a02 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -6,6 +6,7 @@ use Symbol; use Socket; use MogileFS::Connection::Client; use MogileFS::Connection::Worker; +use MogileFS::Util qw(apply_state_events); # This class handles keeping lists of workers and clients and # assigning them to each other when things happen. You don't actually @@ -682,6 +683,14 @@ sub HandleChildRequest { } elsif ($cmd eq ":still_alive") { # a no-op + } elsif ($cmd =~ /^:monitor_events/) { + # Apply the state locally, so when we fork children they have a + # pre-parsed factory. + # Also replay the event back where it came, so the same mechanism + # applies and uses local changes. + apply_state_events(\$cmd); + MogileFS::ProcManager->send_to_all_children($cmd); + } elsif ($cmd eq ":monitor_just_ran") { send_monitor_has_run($child); @@ -819,7 +828,7 @@ sub send_to_all_children { my ($pkg, $msg, $exclude) = @_; foreach my $child (values %child) { next if $exclude && $child == $exclude; - $child->write("$msg\r\n"); + $child->write($msg . "\r\n"); } } diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 60b43403..2bb9de97 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -64,6 +64,15 @@ use MogileFS::Worker::Monitor; use MogileFS::Worker::Fsck; use MogileFS::Worker::JobMaster; +use MogileFS::Factory::Domain; +use MogileFS::Factory::Class; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use MogileFS::NewDomain; +use MogileFS::NewClass; +use MogileFS::NewHost; +use MogileFS::NewDevice; + use MogileFS::HTTPFile; use MogileFS::Class; use MogileFS::Device; diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index d6d6a48d..e908e92a 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -11,8 +11,44 @@ our @EXPORT_OK = qw( error undeferr debug fatal daemonize weighted_list every wait_for_readability wait_for_writeability throw error_code max min first okay_args device_state eurl decode_url_args + encode_url_args apply_state_events ); +# Applies monitor-job-supplied state events against the factory singletons. +# Sad this couldn't be an object method, but ProcManager doesn't base off +# anything common. +sub apply_state_events { + my @events = split(/\s/, ${$_[0]}); + shift @events; # pop the :monitor_events part + + # This will needlessly fetch domain/class/host most of the time. + # Maybe replace with something that "caches" factories? + my %factories = ( 'domain' => MogileFS::Factory::Domain->get_factory, + 'class' => MogileFS::Factory::Class->get_factory, + 'host' => MogileFS::Factory::Host->get_factory, + 'device' => MogileFS::Factory::Device->get_factory, ); + + for my $ev (@events) { + my $args = decode_url_args($ev); + my $mode = delete $args->{ev_mode}; + my $type = delete $args->{ev_type}; + my $id = delete $args->{ev_id}; + + my $old = $factories{$type}->get_by_id($id); + if ($mode eq 'setstate') { + # Host/Device only. + # FIXME: Make objects slightly mutable and directly set fields? + $factories{$type}->set({ %{$old->fields}, %$args }); + } elsif ($mode eq 'set') { + # Re-add any observed data. + my $observed = $old ? $old->observed_fields : {}; + $factories{$type}->set({ %$args, %$observed }); + } elsif ($mode eq 'remove') { + $factories{$type}->remove($old) if $old; + } + } +} + sub every { my ($delay, $code) = @_; my ($worker, $psock_fd); @@ -255,6 +291,11 @@ sub eurl { return $a; } +sub encode_url_args { + my $args = shift; + return join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); +} + sub decode_url_args { my $a = shift; my $buffer = ref $a ? $a : \$a; diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index d432a3a3..75a15345 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -11,7 +11,7 @@ use fields ('psock', # socket for parent/child communications 'queue_todo', # aref of hrefs of work sent from parent ); -use MogileFS::Util qw(error eurl decode_url_args); +use MogileFS::Util qw(error eurl decode_url_args apply_state_events); use MogileFS::Server; use vars ( @@ -263,6 +263,11 @@ sub process_generic_command { return 1; } + if ($$lineref =~ /^:monitor_events/) { + apply_state_events($lineref); + return 1; + } + if ($$lineref =~ /^:monitor_has_run/) { $self->{monitor_has_run} = 1; return 1; diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 3d3663d3..b9c2f665 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -12,7 +12,7 @@ use fields ( 'dele_queue_limit', 'rebl_queue_limit', ); -use MogileFS::Util qw(every error debug eurl); +use MogileFS::Util qw(every error debug encode_url_args); use MogileFS::Config; use constant DEF_FSCK_QUEUE_MAX => 20_000; @@ -72,7 +72,7 @@ sub _check_delete_queues { return unless @to_del; for my $todo (@to_del) { $self->send_to_parent("queue_todo delete " . - _eurl_encode_args($todo)); + encode_url_args($todo)); } return 1; } @@ -101,7 +101,7 @@ sub _check_replicate_queues { for my $todo (@to_repl) { $todo->{_type} = 'replicate'; # could be 'drain', etc. $self->send_to_parent("queue_todo replicate " . - _eurl_encode_args($todo)); + encode_url_args($todo)); } return 1; } @@ -130,7 +130,7 @@ sub _check_fsck_queues { $self->{fsck_queue_limit} = @to_fsck ? $new_limit : 100; return unless @to_fsck; for my $todo (@to_fsck) { - $self->send_to_parent("queue_todo fsck " . _eurl_encode_args($todo)); + $self->send_to_parent("queue_todo fsck " . encode_url_args($todo)); } return 1; } @@ -183,7 +183,7 @@ sub _check_rebal_queues { return unless @to_rebal; for my $todo (@to_rebal) { $todo->{_type} = 'rebalance'; - $self->send_to_parent("queue_todo rebalance " . _eurl_encode_args($todo)); + $self->send_to_parent("queue_todo rebalance " . encode_url_args($todo)); } return 1; } @@ -274,12 +274,6 @@ sub queue_depth_check { return (0, $limit); } -# TODO: Move this into Util.pm? -sub _eurl_encode_args { - my $args = shift; - return join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); -} - 1; # Local Variables: diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 073a1c81..ba7a68bd 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -10,11 +10,14 @@ use fields ( 'seen_hosts', # IP -> 1 (reset every loop) 'ua', # LWP::UserAgent for checking usage files 'iow', # MogileFS::IOStatWatcher object + 'prev_data', # DB data from previous run + 'devutil', # Running tally of device utilization + 'events', # Queue of state events ); use Danga::Socket 1.56; use MogileFS::Config; -use MogileFS::Util qw(error debug); +use MogileFS::Util qw(error debug encode_url_args); use MogileFS::IOStatWatcher; use constant UPDATE_DB_EVERY => 15; @@ -27,6 +30,10 @@ sub new { $self->{last_db_update} = {}; $self->{last_test_write} = {}; $self->{iow} = MogileFS::IOStatWatcher->new; + $self->{prev_data} = { domain => {}, class => {}, host => {}, + device => {} }; + $self->{devutil} = { cur => {}, prev => {} }; + $self->{events} = []; return $self; } @@ -49,6 +56,7 @@ sub work { # Lets not propagate devices that we accidentally find. # This does hit the DB every time a device does not exist, so # perhaps should add negative caching in the future. + $self->{devutil}->{cur}->{$devid} = $util; my $dev = MogileFS::Device->of_devid($devid); next unless $dev->exists; $dev->set_observed_utilization($util); @@ -85,11 +93,176 @@ sub work { }; $main_monitor->(); + + my $db_monitor; + $db_monitor = sub { + $self->parent_ping; + print STDERR "New monitor for db data running\n"; + $self->validate_dbh; + + my $new_data = {}; + my $prev_data = $self->{prev_data}; + my $db_data = $self->grab_all_data; + + # Stack this up to ship back later. + my @events = (); + $self->diff_data($db_data, $prev_data, $new_data, \@events); + + $self->{prev_data} = $new_data; + $self->send_events_to_parent; + Danga::Socket->AddTimer(10, $db_monitor); + print STDERR "New monitor for db finished\n"; + }; + + $db_monitor->(); + # FIXME: Add a "read_from_parent" to ensure we pick up the response for + # populating the factories? + #$self->read_from_parent; + + my $new_monitor; + $new_monitor = sub { + $self->parent_ping; + print STDERR "New monitor running\n"; + $self->validate_dbh; + + my $dev_factory = MogileFS::Factory::Device->get_factory(); + + my $cur_iow = {}; + my @events = (); + # Run check_devices2 to test host/devs. diff against old values. + for my $dev ($dev_factory->get_all) { + if (my $state = $self->is_iow_diff($dev)) { + $self->state_event('device', $dev->id, {utilization => $state}); + } + $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; + $self->check_device2($dev, \@events); + } + + $self->{devutil}->{prev} = $cur_iow; + # Set the IOWatcher hosts (once old monitor code has been disabled) + + $self->send_events_to_parent; + Danga::Socket->AddTimer(2.5, $new_monitor); + print STDERR "New monitor finished\n"; + }; + + $new_monitor->(); Danga::Socket->EventLoop; } # -------------------------------------------------------------------------- +# Flattens and flips events up to the parent. Can be huge on startup! +# Events: set type foo=bar&baz=quux +# remove type id +# setstate type id foo=bar&baz=quux +# Combined: ev_mode=set&ev_type=device&foo=bar +# ev_mode=setstate&ev_type=device&ev_id=1&foo=bar +sub send_events_to_parent { + my $self = shift; + my @flat = (); + for my $ev (@{$self->{events}}) { + my ($mode, $type, $args) = @$ev; + $args->{ev_mode} = $mode; + $args->{ev_type} = $type; + push(@flat, encode_url_args($args)); + } + return unless @flat; + $self->{events} = []; + print STDERR "SENDING STATE CHANGES ", join(' ', ':monitor_events', @flat), "\n"; + $self->send_to_parent(join(' ', ':monitor_events', @flat)); +} + +sub add_event { + push(@{$_[0]->{events}}, $_[1]); +} + +sub set_event { + # Allow callers to use shorthand + $_[3]->{ev_id} = $_[2]; + $_[0]->add_event(['set', $_[1], $_[3]]); +} +sub remove_event { $_[0]->add_event(['remove', $_[1], { ev_id => $_[2] }]); } +sub state_event { + $_[3]->{ev_id} = $_[2]; + $_[0]->add_event(['setstate', $_[1], $_[3]]); +} + +sub is_iow_diff { + my ($self, $dev) = @_; + my $devid = $dev->id; + my $p = $self->{devutil}->{prev}->{$devid}; + my $c = $self->{devutil}->{cur}->{$devid}; + if ( ! defined $p || $p ne $c ) { + return $c; + } + return undef; +} + +sub diff_data { + my ($self, $db_data, $prev_data, $new_data, $ev) = @_; + + for my $type (keys %{$db_data}) { + my $d_data = $db_data->{$type}; + my $p_data = $prev_data->{$type}; + my $n_data = {}; + + for my $item (@{$d_data}) { + my $id = $type eq 'domain' ? $item->{dmid} + : $type eq 'class' ? $item->{dmid} . '-' . $item->{classid} + : $type eq 'host' ? $item->{hostid} + : $type eq 'device' ? $item->{devid} : die "Unknown type"; + my $old = delete $p_data->{$id}; + # Special case: for devices, we don't care if mb_asof changes. + # FIXME: Change the grab routine (or filter there?). + delete $item->{mb_asof} if $type eq 'device'; + if (!$old || $self->diff_hash($old, $item)) { + $self->set_event($type, $id, { %$item }); + } + $n_data->{$id} = $item; + } + for my $id (keys %{$p_data}) { + $self->remove_event($type, $id); + } + + $new_data->{$type} = $n_data; + } +} + +# returns 1 if the hashes are different. +sub diff_hash { + my ($self, $old, $new) = @_; + + my %keys = (); + map { $keys{$_}++ } keys %$old, keys %$new; + for my $k (keys %keys) { + return 1 unless ((exists $old->{$k} && + exists $new->{$k}) && + ( (! defined $old->{$k} && ! defined $new->{$k}) || + ($old->{$k} eq $new->{$k}) ) + ); + } + return 0; +} + +sub grab_all_data { + my $self = shift; + my $sto = Mgd::get_store(); + + # Normalize the domain data to the rest to simplify the differ. + # FIXME: Once new objects are swapped in, fix the original + my %dom = $sto->get_all_domains; + my @fixed_dom = (); + while (my ($name, $id) = each %dom) { + push(@fixed_dom, { namespace => $name, dmid => $id }); + } + my %ret = ( domain => \@fixed_dom, + class => [$sto->get_all_classes], + host => [$sto->get_all_hosts], + device => [$sto->get_all_devices], ); + return \%ret; +} + sub ua { my $self = shift; return $self->{ua} ||= LWP::UserAgent->new( @@ -98,6 +271,123 @@ sub ua { ); } +sub check_device2 { + my ($self, $dev, $ev) = @_; + + my $devid = $dev->id; + my $host = $dev->host; + + my $port = $host->http_port; + my $get_port = $host->http_get_port; # || $port; + my $hostip = $host->ip; + my $url = $dev->usage_url; + + $self->{seen_hosts}{$hostip} = 1; + + # now try to get the data with a short timeout + my $timeout = MogileFS::Config->config('conn_timeout') || 2; + my $start_time = Time::HiRes::time(); + + my $ua = $self->ua; + my $response = $ua->get($url); + my $res_time = Time::HiRes::time(); + + $hostip ||= 'unknown'; + $get_port ||= 'unknown'; + $devid ||= 'unknown'; + $timeout ||= 'unknown'; + $url ||= 'unknown'; + unless ($response->is_success) { + my $failed_after = $res_time - $start_time; + if ($failed_after < 0.5) { + $self->state_event('device', $dev->id, {observed_state => 'unreachable'}) + if (!$dev->observed_unreachable); + error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); + } else { + $failed_after = sprintf("%.02f", $failed_after); + $self->state_event('host', $dev->hostid, {observed_state => 'unreachable'}) + if (!$host->observed_unreachable); + $self->{skip_host}{$dev->hostid} = 1; + error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); + } + return; + } + + # at this point we can reach the host + $self->state_event('host', $dev->hostid, {observed_state => 'reachable'}) + if (!$host->observed_reachable); + $self->{iow}->restart_monitoring_if_needed($hostip); + + my %stats; + my $data = $response->content; + foreach (split(/\r?\n/, $data)) { + next unless /^(\w+)\s*:\s*(.+)$/; + $stats{$1} = $2; + } + + my ($used, $total) = ($stats{used}, $stats{total}); + unless ($used && $total) { + $used = "" unless defined $used; + $total = "" unless defined $total; + my $clen = length($data || ""); + error("dev$devid reports used = $used, total = $total, content-length: $clen, error?"); + return; + } + + # only update database every ~15 seconds per device + my $last_update = $self->{last_db_update}{$dev->id} || 0; + my $next_update = $last_update + UPDATE_DB_EVERY; + my $now = time(); + if ($now >= $next_update) { + Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), + mb_used => int($used / 1024), + devid => $devid); + $self->{last_db_update}{$devid} = $now; + } + + # next if we're not going to try this now + return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; + $self->{last_test_write}{$devid} = $now; + + # now we want to check if this device is writeable + + # first, create the test-write directory. this will return + # immediately after the first time, as the 'create_directory' + # function caches what it's already created. + $dev->create_directory("/dev$devid/test-write"); + + my $num = int(rand 100); # this was "$$-$now" before, but we don't yet have a cleaner in mogstored for these files + my $puturl = "http://$hostip:$port/dev$devid/test-write/test-write-$num"; + my $content = "time=$now rand=$num"; + my $req = HTTP::Request->new(PUT => $puturl); + $req->content($content); + + # TODO: guard against race-conditions with double-check on failure + + # now, depending on what happens + my $resp = $ua->request($req); + if ($resp->is_success) { + # now let's get it back to verify; note we use the get_port to verify that + # the distinction works (if we have one) + my $geturl = "http://$hostip:$get_port/dev$devid/test-write/test-write-$num"; + my $testwrite = $ua->get($geturl); + + # if success and the content matches, mark it writeable + if ($testwrite->is_success && $testwrite->content eq $content) { + $self->state_event('device', $devid, {observed_state => 'writeable'}) + if (!$dev->observed_writeable); + debug("dev$devid: used = $used, total = $total, writeable = 1"); + return; + } + } + + # if we fall through to here, then we know that something is not so good, so mark it readable + # which is guaranteed given we even tested writeability + $self->state_event('device', $devid, {observed_state => 'readable'}) + if (!$dev->observed_readable); + debug("dev$devid: used = $used, total = $total, writeable = 0"); +} + sub check_device { my ($self, $dev) = @_; diff --git a/t/01-domain-class.t b/t/01-domain-class.t index 69336da6..e3133d63 100644 --- a/t/01-domain-class.t +++ b/t/01-domain-class.t @@ -41,7 +41,7 @@ ok($domfac != $classfac, "factories are not the same singleton"); is($dom->name, 'toast', 'domain namespace is toast'); # Add in a test class. - my $cls = $classfac->set($dom, { classid => 1, dmid => 1, mindevcount => 3, + my $cls = $classfac->set({ classid => 1, dmid => 1, mindevcount => 3, replpolicy => '', classname => 'fried'}); ok($cls, "got a class object"); is($cls->id, 1, "class id is 1"); @@ -53,11 +53,11 @@ ok($domfac != $classfac, "factories are not the same singleton"); # Add a few more classes and domains. { my $dom2 = $domfac->set({ dmid => 2, namespace => 'harro' }); - $classfac->set($dom2, { classid => 1, dmid => 2, mindevcount => 2, + $classfac->set({ classid => 1, dmid => 2, mindevcount => 2, replpolicy => '', classname => 'red' }); - $classfac->set($dom2, { classid => 2, dmid => 2, mindevcount => 3, + $classfac->set({ classid => 2, dmid => 2, mindevcount => 3, replpolicy => 'MultipleHosts(2)', classname => 'green' }); - $classfac->set($dom2, { classid => 3, dmid => 2, mindevcount => 4, + $classfac->set({ classid => 3, dmid => 2, mindevcount => 4, replpolicy => 'MultipleHosts(5)', classname => 'blue' }); } From 047d309d57fb340ea60e8c0d5be6726a9a6cd8cc Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 May 2011 02:31:25 -0700 Subject: [PATCH 056/405] Use new objects for domain/class admin commands First test of the new APIs. t/00-startup.t passes. Required a patch to mogadm to stop running "get_domains" before every attempt. Code was redundant and get_domains is now more heavily cached. Strict admin commands talk to the DB directly. API flow uses the cached objects. --- lib/MogileFS/FID.pm | 2 +- lib/MogileFS/Server.pm | 16 +++++++ lib/MogileFS/Store.pm | 14 ++++++ lib/MogileFS/Worker/Monitor.pm | 9 ++-- lib/MogileFS/Worker/Query.pm | 79 ++++++++++++++++++---------------- 5 files changed, 80 insertions(+), 40 deletions(-) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index fb58b2c7..61a004b4 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -189,7 +189,7 @@ sub devfids { # return FID's class sub class { my $self = shift; - return MogileFS::Class->of_fid($self); + return Mgd::class_factory()->get_by_id($self->dmid, $self->classid); } # Get reloaded the next time we're bothered. diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 2bb9de97..d7ef8d5c 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -267,6 +267,22 @@ sub set_store { $store_pid = $$; } +sub domain_factory { + return MogileFS::Factory::Domain->get_factory; +} + +sub class_factory { + return MogileFS::Factory::Class->get_factory; +} + +sub host_factory { + return MogileFS::Factory::Host->get_factory; +} + +sub device_factory { + return MogileFS::Factory::Device->get_factory; +} + # log stuff to syslog or the screen sub log { # simple logging functionality diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 32838414..21007153 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1179,6 +1179,13 @@ sub rename_file { return 1; } +sub get_domainid_by_name { + my $self = shift; + my ($dmid) = $self->dbh->selectrow_array('SELECT dmid FROM domain WHERE namespace = ?', + undef, $_[0]); + return $dmid; +} + # returns a hash of domains. Key is namespace, value is dmid. sub get_all_domains { my ($self) = @_; @@ -1186,6 +1193,13 @@ sub get_all_domains { return map { ($_->[0], $_->[1]) } @{$domains || []}; } +sub get_classid_by_name { + my $self = shift; + my ($classid) = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? AND classname = ?', + undef, $_[0], $_[1]); + return $classid; +} + # returns an array of hashrefs, one hashref per row in the 'class' table sub get_all_classes { my ($self) = @_; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index ba7a68bd..b22a07ee 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -85,7 +85,7 @@ sub work { } $iow->set_hosts(keys %{$self->{seen_hosts}}); - $self->send_to_parent(":monitor_just_ran"); + #$self->send_to_parent(":monitor_just_ran"); # Make sure we sleep for at least 2.5 seconds before running again. # If there's a die above, the monitor will be restarted. @@ -110,7 +110,7 @@ sub work { $self->{prev_data} = $new_data; $self->send_events_to_parent; - Danga::Socket->AddTimer(10, $db_monitor); + Danga::Socket->AddTimer(4, $db_monitor); print STDERR "New monitor for db finished\n"; }; @@ -142,6 +142,8 @@ sub work { # Set the IOWatcher hosts (once old monitor code has been disabled) $self->send_events_to_parent; + + $self->send_to_parent(":monitor_just_ran"); Danga::Socket->AddTimer(2.5, $new_monitor); print STDERR "New monitor finished\n"; }; @@ -346,7 +348,8 @@ sub check_device2 { } # next if we're not going to try this now - return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; + # FIXME: Uncomment this to throttle test writes again. + #return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; $self->{last_test_write}{$devid} = $now; # now we want to check if this device is writeable diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index c76584a6..92d29481 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -164,7 +164,7 @@ sub check_domain { return $self->err_line("no_domain") unless defined $domain && length $domain; # validate domain - my $dmid = MogileFS::Domain->id_of_name($domain) or + my $dmid = eval { Mgd::domain_factory()->get_by_name($domain)->id } or return $self->err_line("unreg_domain"); return $dmid; @@ -188,10 +188,7 @@ sub cmd_clear_cache { my MogileFS::Worker::Query $self = shift; my $args = shift; - MogileFS::Device->invalidate_cache if $args->{devices} || $args->{all}; - MogileFS::Host->invalidate_cache if $args->{hosts} || $args->{all}; - MogileFS::Class->invalidate_cache if $args->{class} || $args->{all}; - MogileFS::Domain->invalidate_cache if $args->{domain} || $args->{all}; + # TODO: Use this to tell Monitor worker to rebroadcast all state return $self->ok_line; } @@ -234,7 +231,7 @@ sub cmd_create_open { my $class = $args->{class} || ""; my $classid = 0; if (length($class)) { - $classid = MogileFS::Class->class_id($dmid, $class) + $classid = eval { Mgd::class_factory()->get_by_name($dmid, $class)->id } or return $self->err_line("unreg_class"); } @@ -464,7 +461,7 @@ sub cmd_updateclass { my $key = $args->{key} or return $self->err_line("no_key"); my $class = $args->{class} or return $self->err_line("no_class"); - my $classid = MogileFS::Class->class_id($dmid, $class) + my $classid = eval { Mgd::class_factory()->get_by_name($dmid, $class)->id } or return $self->err_line('class_not_found'); my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key) @@ -533,9 +530,9 @@ sub cmd_file_debug { } if ($fid) { - $fid->{domain} = MogileFS::Domain->name_of_id($fid->{dmid}); - $fid->{class} = MogileFS::Class->class_name($fid->{dmid}, - $fid->{classid}); + $fid->{domain} = Mgd::domain_factory()->get_by_id->($fid->{dmid})->name; + $fid->{class} = Mgd::class_factory()->get_by_id->($fid->{dmid}, + $fid->{classid})->name; } # Fetch all of the queue data. @@ -589,8 +586,9 @@ sub cmd_file_info { my $ret = {}; $ret->{fid} = $fid->id; - $ret->{domain} = MogileFS::Domain->name_of_id($fid->dmid); - $ret->{class} = MogileFS::Class->class_name($fid->dmid, $fid->classid); + $ret->{domain} = Mgd::domain_factory->get_by_id($fid->dmid)->name; + $ret->{class} = Mgd::class_factory->get_by_id($fid->dmid, + $fid->classid)->name; $ret->{key} = $key; $ret->{'length'} = $fid->length; $ret->{devcount} = $fid->devcount; @@ -627,8 +625,10 @@ sub cmd_list_fids { $ct++; my $fid = $r->{fid}; $ret->{"fid_${ct}_fid"} = $fid; - $ret->{"fid_${ct}_domain"} = ($domains{$r->{dmid}} ||= MogileFS::Domain->name_of_id($r->{dmid})); - $ret->{"fid_${ct}_class"} = ($classes{$r->{dmid}}{$r->{classid}} ||= MogileFS::Class->class_name($r->{dmid}, $r->{classid})); + $ret->{"fid_${ct}_domain"} = ($domains{$r->{dmid}} ||= + Mgd::domain_factory->get_by_id($r->{dmid})->name); + $ret->{"fid_${ct}_class"} = ($classes{$r->{dmid}}{$r->{classid}} ||= + Mgd::class_factory->get_by_id($r->{dmid}, $r->{classid})->name); $ret->{"fid_${ct}_key"} = $r->{dkey}; $ret->{"fid_${ct}_length"} = $r->{length}; $ret->{"fid_${ct}_devcount"} = $r->{devcount}; @@ -782,9 +782,7 @@ sub cmd_create_domain { my $domain = $args->{domain} or return $self->err_line('no_domain'); - # TODO: auth/permissions? - - my $dom = eval { MogileFS::Domain->create($domain); }; + my $dom = eval { Mgd::get_store()->create_domain($domain); }; if ($@) { if (error_code($@) eq "dup") { return $self->err_line('domain_exists'); @@ -802,10 +800,11 @@ sub cmd_delete_domain { my $domain = $args->{domain} or return $self->err_line('no_domain'); - my $dom = MogileFS::Domain->of_namespace($domain) or + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - if (eval { $dom->delete }) { + if (eval { $sto->delete_domain($dmid) }) { return $self->ok_line({ domain => $domain }); } @@ -835,20 +834,29 @@ sub cmd_create_class { return $self->err_line('invalid_replpolicy', $@) if $@; } - my $dom = MogileFS::Domain->of_namespace($domain) or + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - my $cls = $dom->class($class); + my $clsid = $sto->get_classid_by_name($dmid, $class); if ($args->{update}) { - return $self->err_line('class_not_found') if ! $cls; - $cls->set_name($class); + return $self->err_line('class_not_found') if ! $clsid; + $sto->update_class_name(dmid => $dmid, classid => $clsid, + classname => $class); } else { - return $self->err_line('class_exists') if $cls; - $cls = $dom->create_class($class); + $clsid = eval { $sto->create_class($dmid, $class); }; + if ($@) { + if (error_code($@) eq "dup") { + return $self->err_line('class_exists'); + } + return $self->err_line('failure', "$@"); + } } - $cls->set_mindevcount($mindevcount); + $sto->update_class_mindevcount(dmid => $dmid, classid => $clsid, + mindevcount => $mindevcount); # don't erase an existing replpolicy if we're not setting a new one. - $cls->set_replpolicy($replpolicy) if $replpolicy; + $sto->update_class_replpolicy(dmid => $dmid, classid => $clsid, + replpolicy => $replpolicy) if $replpolicy; # return success return $self->ok_line({ class => $class, mindevcount => $mindevcount, domain => $domain }); @@ -871,12 +879,13 @@ sub cmd_delete_class { my $class = $args->{class}; return $self->err_line('no_class') unless length $domain; - my $dom = MogileFS::Domain->of_namespace($domain) or + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - my $cls = $dom->class($class) or + my $clsid = $sto->get_classid_by_name($dmid, $class) or return $self->err_line('class_not_found'); - if (eval { $cls->delete }) { + if (eval { Mgd::get_store()->delete_class($dmid, $clsid) }) { return $self->ok_line({ domain => $domain, class => $class }); } @@ -960,11 +969,9 @@ sub cmd_get_domains { my MogileFS::Worker::Query $self = shift; my $args = shift; - MogileFS::Domain->invalidate_cache; - my $ret = {}; my $dm_n = 0; - foreach my $dom (MogileFS::Domain->domains) { + for my $dom (Mgd::domain_factory()->get_all) { $dm_n++; $ret->{"domain${dm_n}"} = $dom->name; my $cl_n = 0; @@ -1245,13 +1252,13 @@ sub cmd_edit_file { # Take first remaining device from list my $devid = $list[0]; - my $class = MogileFS::Class->of_fid($fid); + my $classid = $fid->classid; my $newfid = eval { Mgd::get_store()->register_tempfile( fid => undef, # undef => let the store pick a fid dmid => $dmid, key => $key, # This tempfile will ultimately become this key - classid => $class->classid, + classid => $classid, devids => $devid, ); }; @@ -1279,7 +1286,7 @@ sub cmd_edit_file { $ret->{newpath} = $paths[1]; $ret->{fid} = $newfid; $ret->{devid} = $devid; - $ret->{class} = $class->classid; + $ret->{class} = $classid; return $self->ok_line($ret); } From 9796f6442700fdaa71e5e330d7a84105b2409036 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 May 2011 14:28:11 -0700 Subject: [PATCH 057/405] Convert MogileFS::Host calls to new objects --- lib/MogileFS/Store.pm | 15 ++++++++ lib/MogileFS/Worker/Query.pm | 75 ++++++++++++++++-------------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 21007153..b0ab2b09 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1242,6 +1242,21 @@ sub remove_fidid_from_devid { return $rv; } +# Test if host exists. +sub get_hostid_by_id { + my $self = shift; + my ($hostid) = $self->dbh->selectrow_array('SELECT hostid FROM host WHERE hostid = ?', + undef, $_[0]); + return $hostid; +} + +sub get_hostid_by_name { + my $self = shift; + my ($hostid) = $self->dbh->selectrow_array('SELECT hostid FROM host WHERE hostname = ?', + undef, $_[0]); + return $hostid; +} + # get all hosts from database, returns them as list of hashrefs, hashrefs being the row contents. sub get_all_hosts { my ($self) = @_; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 92d29481..d298c788 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -702,18 +702,15 @@ sub cmd_get_hosts { my MogileFS::Worker::Query $self = shift; my $args = shift; - MogileFS::Host->invalidate_cache; - my $ret = { hosts => 0 }; - foreach my $host (MogileFS::Host->hosts) { + for my $host (Mgd::host_factory()->get_all) { next if defined $args->{hostid} && $host->id != $args->{hostid}; my $n = ++$ret->{hosts}; - foreach my $key (qw(hostid status hostname hostip - http_port http_get_port - altip altmask)) - { + my $fields = $host->fields(qw(hostid status hostname hostip http_port + http_get_port altip altmask)); + while (my ($key, $val) = each %$fields) { # must be regular data so copy it in - $ret->{"host${n}_$key"} = $host->field($key); + $ret->{"host${n}_$key"} = $val; } } @@ -749,24 +746,20 @@ sub cmd_create_device { my $devid = $args->{devid}; return $self->err_line("invalid_devid") unless $devid && $devid =~ /^\d+$/; - my ($host, $hostid); + my $hostid; - MogileFS::Host->check_cache; + my $sto = Mgd::get_store(); if ($args->{hostid} && $args->{hostid} =~ /^\d+$/) { - $hostid = $args->{hostid}; - $host = MogileFS::Host->of_hostid($hostid); - return $self->err_line("unknown_hostid") unless $host && $host->exists; + $hostid = $sto->get_hostid_by_id($args->{hostid}); + return $self->err_line("unknown_hostid") unless $hostid; } elsif (my $hname = $args->{hostname}) { - $host = MogileFS::Host->of_hostname($hname); - return $self->err_line("unknown_host") unless $host; - $hostid = $host->id; + $hostid = $sto->get_hostid_by_name($hname); + return $self->err_line("unknown_host") unless $hostid; } else { return $self->err_line("bad_args", "No hostid/hostname parameter"); } - if (eval { MogileFS::Device->create(devid => $devid, - hostid => $hostid, - status => $status) }) { + if (eval { $sto->create_device($devid, $hostid, $status) }) { return $self->ok_line; } @@ -901,14 +894,15 @@ sub cmd_create_host { my $hostname = $args->{host} or return $self->err_line('no_host'); - my $host = MogileFS::Host->of_hostname($hostname); + my $sto = Mgd::get_store(); + my $hostid = $sto->get_hostid_by_name($hostname); # if we're creating a new host, require ip/port, and default to # host being down if client didn't specify if ($args->{update}) { - return $self->err_line('host_not_found') unless $host; + return $self->err_line('host_not_found') unless $hostid; } else { - return $self->err_line('host_exists') if $host; + return $self->err_line('host_exists') if $hostid; return $self->err_line('no_ip') unless $args->{ip}; return $self->err_line('no_port') unless $args->{port}; $args->{status} ||= 'down'; @@ -916,26 +910,23 @@ sub cmd_create_host { if ($args->{status}) { return $self->err_line('unknown_state') - unless MogileFS::Host->valid_initial_state($args->{status}); + unless MogileFS::Host->valid_state($args->{status}); } # arguments all good, let's do it. - $host ||= MogileFS::Host->create($hostname, $args->{ip}); - my %setter = ( - status => "set_status", - ip => "set_ip", - port => "set_http_port", - getport => "set_http_get_port", - altip => "set_alt_ip", - altmask => "set_alt_mask", - ); - while (my ($f, $meth) = each %setter) { - $host->$meth($args->{$f}) if exists $args->{$f}; - } + $hostid ||= $sto->create_host($hostname, $args->{ip}); + + # Protocol mismatch data fixup. + $args->{hostip} = delete $args->{ip} if exists $args->{ip}; + $args->{http_port} = delete $args->{port} if exists $args->{port}; + $args->{http_get_port} = delete $args->{getport} if exists $args->{getport}; + my @toupdate = grep { exists $args->{$_} } qw(status hostip http_port + http_get_port altip altmask); + $sto->update_host($hostid, { map { $_ => $args->{$_} } @toupdate }); # return success - return $self->ok_line($host->overview_hashref); + return $self->ok_line({ hostid => $hostid, hostname => $hostname }); } sub cmd_update_host { @@ -950,17 +941,17 @@ sub cmd_delete_host { my MogileFS::Worker::Query $self = shift; my $args = shift; - my $host = MogileFS::Host->of_hostname($args->{host}) + my $sto = Mgd::get_store(); + my $hostid = $sto->get_hostid_by_name($args->{host}) or return $self->err_line('unknown_host'); - my $hostid = $host->id; - - foreach my $dev (MogileFS::Device->devices) { + # TODO: $sto->delete_host should have a "has_devices" test internally + for my $dev ($sto->get_all_devices) { return $self->err_line('host_not_empty') - if $dev->hostid == $hostid; + if $dev->{hostid} == $hostid; } - $host->delete; + $sto->delete_host($hostid); return $self->ok_line; } From 2162f992d9fd4c9723bcadcb23f97b9e59639469 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 May 2011 17:26:44 -0700 Subject: [PATCH 058/405] Convert to new Device objects. Tests pass, and this is technically the last of it, but it's still scary. --- lib/MogileFS/DevFID.pm | 3 +-- lib/MogileFS/FID.pm | 4 ++-- lib/MogileFS/HTTPFile.pm | 2 +- lib/MogileFS/NewClass.pm | 4 ++++ lib/MogileFS/NewDevice.pm | 16 +++++++++++---- lib/MogileFS/NewDomain.pm | 4 ++++ lib/MogileFS/Rebalance.pm | 12 +++++------ lib/MogileFS/ReplicationRequest.pm | 2 +- lib/MogileFS/Worker/Delete.pm | 6 +++--- lib/MogileFS/Worker/Fsck.pm | 2 +- lib/MogileFS/Worker/JobMaster.pm | 2 +- lib/MogileFS/Worker/Monitor.pm | 4 ++-- lib/MogileFS/Worker/Query.pm | 32 +++++++++++++++++------------- lib/MogileFS/Worker/Reaper.pm | 2 +- lib/MogileFS/Worker/Replicate.pm | 12 +++++------ t/00-startup.t | 8 ++++++-- t/10-weighting.t | 4 +++- t/30-rebalance.t | 23 +++++++++++---------- 18 files changed, 84 insertions(+), 58 deletions(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 6211a905..e235e206 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -25,7 +25,7 @@ sub as_string { sub device { my $self = shift; - return $self->{dev} ||= MogileFS::Device->of_devid($self->{devid}); + return $self->{dev} ||= Mgd::device_factory()->get_by_id($self->{devid}); } sub fid { @@ -105,7 +105,6 @@ sub _make_full_url { # get some information we'll need my $dev = $self->device or return undef; my $host = $dev->host or return undef; - return undef unless $host->exists; my $path = $self->uri_path; my $hostip = $host->ip; diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 61a004b4..1aa2d9e7 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -177,7 +177,7 @@ sub devids { sub devs { my $self = shift; - return map { MogileFS::Device->of_devid($_) } $self->devids; + return map { Mgd::device_factory()->get_by_id($_) } $self->devids; } sub devfids { @@ -207,7 +207,7 @@ sub devids_meet_policy { my $polobj = $cls->repl_policy_obj; - my $alldev = MogileFS::Device->map + my $alldev = Mgd::device_factory()->map_by_id or die "No global device map"; my @devs = $self->devs; diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index fe727eac..46564888 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -45,7 +45,7 @@ sub host_id { # return MogileFS::Device object sub device { my $self = shift; - return MogileFS::Device->of_devid($self->device_id); + return Mgd::device_factory()->get_by_id($self->device_id); } # return MogileFS::Host object diff --git a/lib/MogileFS/NewClass.pm b/lib/MogileFS/NewClass.pm index 059c78aa..ea567a51 100644 --- a/lib/MogileFS/NewClass.pm +++ b/lib/MogileFS/NewClass.pm @@ -52,4 +52,8 @@ sub has_files { return Mgd::get_store()->class_has_files($self->{dmid}, $self->id); } +sub observed_fields { + return {}; +} + 1; diff --git a/lib/MogileFS/NewDevice.pm b/lib/MogileFS/NewDevice.pm index ce88a370..5ae6dec7 100644 --- a/lib/MogileFS/NewDevice.pm +++ b/lib/MogileFS/NewDevice.pm @@ -39,6 +39,7 @@ sub new_from_args { # Instance methods sub id { return $_[0]{devid} } +sub devid { return $_[0]{devid} } sub name { return $_[0]{devid} } sub status { return $_[0]{status} } sub weight { return $_[0]{weight} } @@ -83,7 +84,8 @@ sub percent_full { sub fields { my $self = shift; my @tofetch = @_ ? @_ : @fields; - my $ret = { map { $_ => $self->{$_} } @tofetch }; + my $ret = { (map { $_ => $self->{$_} } @tofetch), + 'mb_free' => $self->mb_free }; return $ret; } @@ -155,7 +157,8 @@ sub should_get_new_files { sub mb_free { my $self = shift; - return $self->{mb_total} - $self->{mb_used}; + return $self->{mb_total} - $self->{mb_used} + if $self->{mb_total} && $self->{mb_used}; } sub mb_used { @@ -169,8 +172,8 @@ sub should_get_replicated_files { sub not_on_hosts { my ($self, @hosts) = @_; - my @hostids = map { ref($_) ? $_->hostid : $_ } @hosts; - my $my_hostid = $self->hostid; + my @hostids = map { ref($_) ? $_->id : $_ } @hosts; + my $my_hostid = $self->id; return (grep { $my_hostid == $_ } @hostids) ? 0 : 1; } @@ -299,4 +302,9 @@ sub vivify_directories { $self->create_directory("/dev$devid/$p1/$p2/$p3"); } +# FIXME: Remove this once vestigial code is removed. +sub set_observed_utilization { + return 1; +} + 1; diff --git a/lib/MogileFS/NewDomain.pm b/lib/MogileFS/NewDomain.pm index 20116756..06902a60 100644 --- a/lib/MogileFS/NewDomain.pm +++ b/lib/MogileFS/NewDomain.pm @@ -38,4 +38,8 @@ sub class { return $self->{class_factory}->get_by_name($self, $_[0]); } +sub observed_fields { + return {}; +} + 1; diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 39cbb70a..c936f044 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -97,7 +97,7 @@ sub stop { my $s = $self->{state}; my $sdev = $self->{sdev_current}; unless ($p->{leave_in_drain_mode}) { - MogileFS::Device->of_devid($sdev)->set_state('alive') if $sdev; + Mgd::get_store()->set_device_state($sdev, 'alive') if $sdev; } $s->{time_stopped} = time(); } @@ -211,7 +211,7 @@ sub next_fids_to_rebalance { # If we're not working against a source device, discover one my $sdev = $self->_find_source_device($state->{source_devs}); return undef unless $sdev; - $sdev = MogileFS::Device->of_devid($sdev); + $sdev = Mgd::device_factory()->get_by_id($sdev); my $filtered_destdevs = $self->filter_dest_devices($devs); croak("rebalance cannot find suitable destination devices") @@ -309,7 +309,7 @@ sub filter_source_devices { my @sdevs = (); for my $dev (@$devs) { - next unless $dev->exists && $dev->can_delete_from; + next unless $dev->can_delete_from; my $id = $dev->id; if (@{$policy->{from_devices}}) { next unless grep { $_ == $id } @{$policy->{from_devices}}; @@ -361,7 +361,7 @@ sub _finish_source_device { # Unless the user wants a device to never get new files again (sticking in # drain mode), return to alive. unless ($policy->{leave_in_drain_mode}) { - MogileFS::Device->of_devid($sdev)->set_state('alive'); + Mgd::get_store()->set_device_state($sdev, 'alive'); } push @{$state->{completed_devs}}, $sdev; } @@ -392,7 +392,7 @@ sub _find_source_device { } } # Must mark device in "drain" mode while we work on it. - MogileFS::Device->of_devid($sdev)->set_state('drain'); + Mgd::get_store()->set_device_state($sdev, 'drain'); $state->{sdev_limit} = $limit; } @@ -434,7 +434,7 @@ sub filter_dest_devices { my @ddevs = (); for my $dev (@devs) { - next unless $dev->exists && $dev->should_get_new_files; + next unless $dev->should_get_new_files; my $id = $dev->id; my $hostid = $dev->hostid; diff --git a/lib/MogileFS/ReplicationRequest.pm b/lib/MogileFS/ReplicationRequest.pm index 625df014..373d6656 100644 --- a/lib/MogileFS/ReplicationRequest.pm +++ b/lib/MogileFS/ReplicationRequest.pm @@ -26,7 +26,7 @@ sub rr_upgrade { # for ideal replications sub replicate_to { my ($class, @devs) = @_; - @devs = map { ref $_ ? $_ : MogileFS::Device->of_devid($_) } @devs; + @devs = map { ref $_ ? $_ : Mgd::device_factory()->get_by_id($_) } @devs; return bless { ideal_next => \@devs, }, $class; diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index a838e96f..de530ce8 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -162,9 +162,9 @@ sub process_deletes2 { for my $devid (@devids) { - my $dev = $devid ? MogileFS::Device->of_devid($devid) : undef; + my $dev = $devid ? Mgd::device_factory()->get_by_id($devid) : undef; error("deleting fid $fidid, on devid ".($devid || 'NULL')."...") if $Mgd::DEBUG >= 2; - unless ($dev && $dev->exists) { + unless ($dev) { next; } if ($dev->dstate->is_perm_dead) { @@ -310,7 +310,7 @@ sub process_deletes { # CASE: devid is marked dead or doesn't exist: consider it deleted on this devid. # (Note: we're tolerant of '0' as a devid, due to old buggy version which # would sometimes put that in there) - my $dev = $devid ? MogileFS::Device->of_devid($devid) : undef; + my $dev = $devid ? Mgd::device_factory()->get_by_id($devid) : undef; unless ($dev && $dev->exists) { $done_with_devid->("devid_doesnt_exist"); next; diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index dc9f4619..21739e37 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -320,7 +320,7 @@ sub fix_fid { @dfids = List::Util::shuffle( map { MogileFS::DevFID->new($_, $fid) } grep { $_->dstate->should_fsck_search_on } - MogileFS::Device->devices + Mgd::device_factory()->get_all ); $check_dfids->("desperate"); diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index b9c2f665..00a5100f 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -211,7 +211,7 @@ sub _inject_rebalance_queues { my $rebal_state = MogileFS::Config->server_setting('rebal_state'); $rebal->policy($rebal_pol); - my @devs = MogileFS::Device->devices; + my @devs = Mgd::device_factory()->get_all; if ($rebal_state) { $rebal->load_state($rebal_state); } else { diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index b22a07ee..f76d4935 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -57,8 +57,8 @@ sub work { # This does hit the DB every time a device does not exist, so # perhaps should add negative caching in the future. $self->{devutil}->{cur}->{$devid} = $util; - my $dev = MogileFS::Device->of_devid($devid); - next unless $dev->exists; + my $dev = Mgd::device_factory()->get_by_id($devid); + next unless $dev; $dev->set_observed_utilization($util); } }); diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index d298c788..c4c5fe05 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -245,8 +245,8 @@ sub cmd_create_open { my @devices; - unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [MogileFS::Device->devices], \@devices)) { - @devices = sort_devs_by_freespace(MogileFS::Device->devices); + unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [Mgd::device_factory()->get_all], \@devices)) { + @devices = sort_devs_by_freespace(Mgd::device_factory()->get_all); } # find suitable device(s) to put this file on. @@ -327,7 +327,6 @@ sub sort_devs_by_freespace { } sort { $b->percent_free <=> $a->percent_free; } grep { - $_->exists && $_->should_get_new_files; } @_; @@ -722,11 +721,11 @@ sub cmd_get_devices { my $args = shift; my $ret = { devices => 0 }; - foreach my $dev (MogileFS::Device->devices) { + for my $dev (Mgd::device_factory()->get_all) { next if defined $args->{devid} && $dev->id != $args->{devid}; my $n = ++$ret->{devices}; - my $sum = $dev->overview_hashref; + my $sum = $dev->fields; while (my ($key, $val) = each %$sum) { $ret->{"dev${n}_$key"} = $val; } @@ -1032,7 +1031,7 @@ sub cmd_get_paths { # add to memcache, if needed. for an hour. $memc->add($mogfid_memkey, $fid->id, 3600) if $need_fid_in_memcache; - my $dmap = MogileFS::Device->map; + my $dmap = Mgd::device_factory()->map_by_id; my $ret = { paths => 0, @@ -1187,7 +1186,7 @@ sub cmd_edit_file { # add to memcache, if needed. for an hour. $memc->add($mogfid_memkey, $fid->id, 3600) if $need_fid_in_memcache; - my $dmap = MogileFS::Device->map; + my $dmap = Mgd::device_factory()->map_by_id; my @devices_with_weights; @@ -1290,8 +1289,10 @@ sub cmd_set_weight { return $self->err_line('bad_params') unless $hostname && $devid && $weight >= 0; - my $dev = MogileFS::Device->from_devid_and_hostname($devid, $hostname) - or return $self->err_line('host_mismatch'); + my $dev = Mgd::device_factory()->get_by_id($devid); + return $self->err_line('no_device') unless $dev; + return $self->err_line('host_mismatch') + unless $dev->host->hostname eq $hostname; $dev->set_weight($weight); @@ -1309,14 +1310,16 @@ sub cmd_set_state { return $self->err_line('bad_params') unless $hostname && $devid && $dstate; - my $dev = MogileFS::Device->from_devid_and_hostname($devid, $hostname) - or return $self->err_line('host_mismatch'); + my $dev = Mgd::device_factory()->get_by_id($devid); + return $self->err_line('no_device') unless $dev; + return $self->err_line('host_mismatch') + unless $dev->host->hostname eq $hostname; # make sure the destination state isn't too high return $self->err_line('state_too_high') unless $dev->can_change_to_state($state); - $dev->set_state($state); + Mgd::get_store()->set_device_state($dev->id, $state); return $self->ok_line; } @@ -1561,7 +1564,7 @@ sub cmd_rebalance_start { my $rebal = MogileFS::Rebalance->new; $rebal->policy($rebal_pol); - my @devs = MogileFS::Device->devices; + my @devs = Mgd::device_factory()->get_all; $rebal->init(\@devs); my $sdevs = $rebal->source_devices; @@ -1580,7 +1583,7 @@ sub cmd_rebalance_test { return $self->err_line('no_rebal_policy') unless $rebal_pol; my $rebal = MogileFS::Rebalance->new; - my @devs = MogileFS::Device->devices; + my @devs = Mgd::device_factory()->get_all; $rebal->policy($rebal_pol); $rebal->init(\@devs); @@ -1683,6 +1686,7 @@ sub err_line { 'key_exists' => "Target key name already exists; can't overwrite.", 'no_class' => "No class provided", 'no_devices' => "No devices found to store file", + 'no_device' => "Device not found", 'no_domain' => "No domain provided", 'no_host' => "No host provided", 'no_ip' => "IP required to create host", diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index c2071d55..240bbf22 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -28,7 +28,7 @@ sub work { debug("Reaper running; looking for dead devices"); foreach my $dev (grep { $_->dstate->is_perm_dead } - MogileFS::Device->devices) + Mgd::device_factory()->get_all) { my $devid = $dev->id; next if $all_empty{$devid}; diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 4f6fad6c..855867f9 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -181,7 +181,7 @@ sub replicate_using_torepl_table { my $devfid; # First one we can delete from, we try to rebalance away from. for (@devs) { - my $dev = MogileFS::Device->of_devid($_); + my $dev = Mgd::device_factory()->get_by_id($_); # Not positive 'can_read_from' needs to be here. # We must be able to delete off of this dev so the fid can # move. @@ -361,7 +361,7 @@ sub replicate { }; # hashref of devid -> MogileFS::Device - my $devs = MogileFS::Device->map + my $devs = Mgd::device_factory()->map_by_id or die "No device map"; return $retunlock->(0, "failed_getting_lock", "Unable to obtain lock for fid $fidid") @@ -380,7 +380,7 @@ sub replicate { my @on_up_devid; # subset of @on_devs: just devs that are readable foreach my $devid ($fid->devids) { - my $d = MogileFS::Device->of_devid($devid) + my $d = Mgd::device_factory()->get_by_id($devid) or next; push @on_devs, $d; if ($d->dstate->should_have_files && ! $mask_devids->{$devid}) { @@ -571,11 +571,11 @@ sub http_copy { }; # get some information we'll need - my $sdev = MogileFS::Device->of_devid($sdevid); - my $ddev = MogileFS::Device->of_devid($ddevid); + my $sdev = Mgd::device_factory()->get_by_id($sdevid); + my $ddev = Mgd::device_factory()->get_by_id($ddevid); return error("Error: unable to get device information: source=$sdevid, destination=$ddevid, fid=$fid") - unless $sdev && $ddev && $sdev->exists && $ddev->exists; + unless $sdev && $ddev; my $s_dfid = MogileFS::DevFID->new($sdev, $fid); my $d_dfid = MogileFS::DevFID->new($ddev, $fid); diff --git a/t/00-startup.t b/t/00-startup.t index eac15a65..90b632b0 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -20,7 +20,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 72; + plan tests => 73; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -108,6 +108,8 @@ ok($tmptrack->mogadm("device", "add", "hostB", 4), "created dev4 on hostB"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; + ok($be->do_request("do_monitor_round", {}), "waited for monitor") + or die "Failed to wait for monitor"; ok($be->do_request("do_monitor_round", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; @@ -115,11 +117,13 @@ ok($tmptrack->mogadm("device", "add", "hostB", 4), "created dev4 on hostB"); { my $fh = $mogc->new_file('no_content', "2copies"); + die "Error: " . $mogc->errstr unless $fh; ok(close($fh), "closed file"); } { my $fh = $mogc->new_file('no_content', "2copies"); + die "Error: " . $mogc->errstr unless $fh; ok(close($fh), "closed file"); } @@ -273,7 +277,7 @@ ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC"); ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); # let it be discovered -sleep(3); # FIXME: make an explicit "rescan" or "remonitor" job to mogilefsd, just for test suite +sleep(5); # FIXME: make an explicit "rescan" or "remonitor" job to mogilefsd, just for test suite ok($tmptrack->mogadm("device", "mark", "hostB", 3, "dead"), "marked device B/3 dead"); ok($tmptrack->mogadm("device", "mark", "hostB", 4, "dead"), "marked device B/4 dead"); diff --git a/t/10-weighting.t b/t/10-weighting.t index 3bee554d..5f7c41f5 100644 --- a/t/10-weighting.t +++ b/t/10-weighting.t @@ -27,7 +27,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 16; + plan tests => 17; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -80,6 +80,8 @@ ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; + ok($be->do_request("do_monitor_round", {}), "waited for monitor") + or die "Failed to wait for monitor"; ok($be->do_request("do_monitor_round", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; diff --git a/t/30-rebalance.t b/t/30-rebalance.t index 990fa435..190398cc 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -13,7 +13,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 53; + plan tests => 48; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -78,17 +78,12 @@ ok($tmptrack->mogadm("host", "add", "hostC", "--ip=$hostC_ip", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC"); ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); -ok($tmptrack->mogadm("device", "mark", "hostA", 1, "alive"), "dev1 alive"); -ok($tmptrack->mogadm("device", "mark", "hostA", 2, "alive"), "dev2 alive"); -ok($tmptrack->mogadm("device", "mark", "hostB", 3, "alive"), "dev3 alive"); -ok($tmptrack->mogadm("device", "mark", "hostB", 4, "alive"), "dev4 alive"); -ok($tmptrack->mogadm("device", "mark", "hostC", 5, "alive"), "dev5 alive"); -ok($tmptrack->mogadm("device", "mark", "hostC", 6, "alive"), "dev6 alive"); - # wait for monitor { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; + ok($be->do_request("do_monitor_round", {}), "waited for monitor") + or die "Failed to wait for monitor"; ok($be->do_request("do_monitor_round", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; @@ -127,15 +122,21 @@ use MogileFS::Device; use MogileFS::Host; use MogileFS::Config; use MogileFS::Rebalance; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; use Data::Dumper qw/Dumper/; -my @devs = MogileFS::Device->devices; -my @hosts = MogileFS::Host->hosts; +my $dfac = MogileFS::Factory::Device->get_factory; +my $hfac = MogileFS::Factory::Host->get_factory; + +map { $hfac->set($_) } $sto->get_all_hosts; +map { $dfac->set($_) } $sto->get_all_devices; +my @devs = $dfac->get_all; ### Hacks to make tests work :/ $MogileFS::Config::skipconfig = 1; MogileFS::Config->load_config; -for my $h (@hosts) { +for my $h ($hfac->get_all) { print "hostid: ", $h->id, " name: ", $h->hostname, "\n"; $h->{observed_state} = "reachable"; } From 3db8a84930417ded2d3ed73a2c6bf8aeb8da4edf Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 May 2011 19:03:21 -0700 Subject: [PATCH 059/405] Remove old state management code First of several code nukes. Monitor worker should say "I have been run" if it's diffed the DB side. All broadcast statements and invalidation commands were removed. --- lib/MogileFS/HTTPFile.pm | 6 -- lib/MogileFS/ProcManager.pm | 27 ----- lib/MogileFS/Worker.pm | 63 ------------ lib/MogileFS/Worker/Delete.pm | 1 - lib/MogileFS/Worker/Monitor.pm | 174 +++------------------------------ 5 files changed, 16 insertions(+), 255 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 46564888..bc2fec42 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -239,17 +239,11 @@ sub size { # did we timeout? unless (wait_for_writeability(fileno($httpsock), $time_remain)) { - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->broadcast_host_unreachable($self->host_id); - } return undeferr("get_file_size() connect timeout for HTTP HEAD for size of $path"); } # did we fail to connect? (got a RST, etc) unless (getpeername($httpsock)) { - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->broadcast_device_unreachable($self->device_id); - } return undeferr("get_file_size() connect failure for HTTP HEAD for size of $path"); } diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 224e1a02..f47595ee 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -627,10 +627,6 @@ sub HandleChildRequest { # pass it on to our error handler, prefaced with the child's job Mgd::debug("[" . $child->job . "(" . $child->pid . ")] $1"); - } elsif ($cmd =~ /^:state_change (\w+) (\d+) (\w+)/) { - my ($what, $whatid, $state) = ($1, $2, $3); - state_change($what, $whatid, $state, $child); - } elsif ($cmd =~ /^queue_depth (\w+)/) { my $job = $1; if ($job eq 'all') { @@ -707,14 +703,6 @@ sub HandleChildRequest { # and this will rebroadcast it to all other children # (including the one that just set it to us, but eh) MogileFS::Config->set_config($1, $2); - } elsif (my ($devid, $util) = $cmd =~ /^:set_dev_utilization (\d+) (.+)/) { - $dev_util{$devid} = $util; - - # time to rebroadcast dev utilization messages to all children? - if ($nowish > $last_util_spray + 3) { - $last_util_spray = $nowish; - MogileFS::ProcManager->send_to_all_children(":set_dev_utilization " . join(" ", %dev_util)); - } } else { # unknown command my $show = $cmd; @@ -799,21 +787,6 @@ sub is_child { return $IsChild; } -sub state_change { - my ($what, $whatid, $state, $exclude) = @_; - my $key = "$what-$whatid"; - my $now = time(); - foreach my $child (values %child) { - my $old = $child->{known_state}{$key} || ""; - if (!$old || $old->[1] ne $state || $old->[0] < $now - 300) { - $child->{known_state}{$key} = [$now, $state]; - - $child->write(":state_change $what $whatid $state\r\n") - unless $exclude && $child == $exclude; - } - } -} - sub wake_a { my ($pkg, $class, $fromchild) = @_; # from arg is optional (which child sent it) my $child = MogileFS::ProcManager->is_child; diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 75a15345..93375c4a 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -188,39 +188,6 @@ sub parent_ping { } } -sub broadcast_device_writeable { - $_[0]->_broadcast_state("device", $_[1], "writeable"); -} -sub broadcast_device_readable { - $_[0]->_broadcast_state("device", $_[1], "readable"); -} -sub broadcast_device_unreachable { - $_[0]->_broadcast_state("device", $_[1], "unreachable"); -} -sub broadcast_host_reachable { - $_[0]->_broadcast_state("host", $_[1], "reachable"); -} -sub broadcast_host_unreachable { - $_[0]->_broadcast_state("host", $_[1], "unreachable"); -} - -sub _broadcast_state { - my ($self, $what, $whatid, $state) = @_; - if ($what eq "host") { - MogileFS::Host->of_hostid($whatid)->set_observed_state($state); - } elsif ($what eq "device") { - MogileFS::Device->of_devid($whatid)->set_observed_state($state); - } - my $key = "$what-$whatid"; - my $laststate = $self->{last_bcast_state}{$key}; - my $now = time(); - # broadcast on initial discovery, state change, and every 10 seconds - if (!$laststate || $laststate->[1] ne $state || $laststate->[0] < $now - 10) { - $self->send_to_parent(":state_change $what $whatid $state"); - $self->{last_bcast_state}{$key} = [$now, $state]; - } -} - sub invalidate_meta { my ($self, $what) = @_; return if $Mgd::INVALIDATE_NO_PROPOGATE; # anti recursion @@ -235,16 +202,6 @@ sub process_generic_command { my ($self, $lineref) = @_; return 0 unless $$lineref =~ /^:/; # all generic commands start with colon - if ($$lineref =~ /^:state_change (\w+) (\d+) (\w+)/) { - my ($what, $whatid, $state) = ($1, $2, $3); - if ($what eq "host") { - MogileFS::Host->of_hostid($whatid)->set_observed_state($state); - } elsif ($what eq "device") { - MogileFS::Device->of_devid($whatid)->set_observed_state($state); - } - return 1; - } - if ($$lineref =~ /^:shutdown/) { $$got_live_vs_die = 1 if $got_live_vs_die; exit 0; @@ -255,14 +212,6 @@ sub process_generic_command { return 1; } - if ($$lineref =~ /^:invalidate_meta_once (\w+)/) { - local $Mgd::INVALIDATE_NO_PROPOGATE = 1; - # where $1 is one of {"domain", "device", "host", "class"} - my $class = "MogileFS::" . ucfirst(lc($1)); - $class->invalidate_cache; - return 1; - } - if ($$lineref =~ /^:monitor_events/) { apply_state_events($lineref); return 1; @@ -284,18 +233,6 @@ sub process_generic_command { return 1; } - # :set_dev_utilization dev# 45.2 dev# 45.2 dev# 45.2 dev# 45.2 dev 45.2\n - # (dev#, utilz%)+ - if (my ($devid, $util) = $$lineref =~ /^:set_dev_utilization (.+)/) { - my %pairs = split(/\s+/, $1); - local $MogileFS::Device::util_no_broadcast = 1; - while (my ($devid, $util) = each %pairs) { - my $dev = eval { MogileFS::Device->of_devid($devid) } or next; - $dev->set_observed_utilization($util); - } - return 1; - } - # queue_name depth if ($$lineref =~ /^:queue_depth (\w+) (\d+)/) { $self->queue_depth($1, $2); diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index de530ce8..9eb2def7 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -352,7 +352,6 @@ sub process_deletes { Timeout => 2); unless ($sock) { # timeout or something, mark this device as down for now and move on - $self->broadcast_host_unreachable($dev->hostid); $reschedule_fid->(60 * 60 * 2, "no_sock_to_hostid"); next; } diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index f76d4935..a3843c0c 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -63,41 +63,10 @@ sub work { } }); - my $main_monitor; - $main_monitor = sub { - $self->parent_ping; - - # get db and note we're starting a run - debug("Monitor running; scanning usage files"); - $self->validate_dbh; - - $self->{skip_host} = {}; # hostid -> 1 if already noted dead. - $self->{seen_hosts} = {}; # IP -> 1 - - # now iterate over devices - MogileFS::Device->invalidate_cache; - MogileFS::Host->invalidate_cache; - - foreach my $dev (MogileFS::Device->devices) { - next unless $dev->dstate->should_monitor; - next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev); - } - - $iow->set_hosts(keys %{$self->{seen_hosts}}); - #$self->send_to_parent(":monitor_just_ran"); - - # Make sure we sleep for at least 2.5 seconds before running again. - # If there's a die above, the monitor will be restarted. - Danga::Socket->AddTimer(2.5, $main_monitor); - }; - - $main_monitor->(); - my $db_monitor; $db_monitor = sub { $self->parent_ping; - print STDERR "New monitor for db data running\n"; + debug("Monitor running; checking DB for updates"); $self->validate_dbh; my $new_data = {}; @@ -110,32 +79,34 @@ sub work { $self->{prev_data} = $new_data; $self->send_events_to_parent; + $self->send_to_parent(":monitor_just_ran"); Danga::Socket->AddTimer(4, $db_monitor); - print STDERR "New monitor for db finished\n"; }; $db_monitor->(); - # FIXME: Add a "read_from_parent" to ensure we pick up the response for - # populating the factories? - #$self->read_from_parent; + $self->read_from_parent; - my $new_monitor; - $new_monitor = sub { + my $main_monitor; + $main_monitor = sub { $self->parent_ping; - print STDERR "New monitor running\n"; + debug("Monitor running; scanning usage files"); $self->validate_dbh; + $self->{skip_host} = {}; # hostid -> 1 if already noted dead. + $self->{seen_hosts} = {}; # IP -> 1 + my $dev_factory = MogileFS::Factory::Device->get_factory(); my $cur_iow = {}; my @events = (); - # Run check_devices2 to test host/devs. diff against old values. + # Run check_devices to test host/devs. diff against old values. for my $dev ($dev_factory->get_all) { if (my $state = $self->is_iow_diff($dev)) { $self->state_event('device', $dev->id, {utilization => $state}); } $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; - $self->check_device2($dev, \@events); + next if $self->{skip_host}{$dev->hostid}; + $self->check_device($dev, \@events); } $self->{devutil}->{prev} = $cur_iow; @@ -143,12 +114,11 @@ sub work { $self->send_events_to_parent; - $self->send_to_parent(":monitor_just_ran"); - Danga::Socket->AddTimer(2.5, $new_monitor); - print STDERR "New monitor finished\n"; + $iow->set_hosts(keys %{$self->{seen_hosts}}); + Danga::Socket->AddTimer(2.5, $main_monitor); }; - $new_monitor->(); + $main_monitor->(); Danga::Socket->EventLoop; } @@ -273,7 +243,7 @@ sub ua { ); } -sub check_device2 { +sub check_device { my ($self, $dev, $ev) = @_; my $devid = $dev->id; @@ -391,118 +361,6 @@ sub check_device2 { debug("dev$devid: used = $used, total = $total, writeable = 0"); } -sub check_device { - my ($self, $dev) = @_; - - my $devid = $dev->id; - my $host = $dev->host; - - my $port = $host->http_port; - my $get_port = $host->http_get_port; # || $port; - my $hostip = $host->ip; - my $url = $dev->usage_url; - - $self->{seen_hosts}{$hostip} = 1; - - # now try to get the data with a short timeout - my $timeout = MogileFS::Config->config('conn_timeout') || 2; - my $start_time = Time::HiRes::time(); - - my $ua = $self->ua; - my $response = $ua->get($url); - my $res_time = Time::HiRes::time(); - - $hostip ||= 'unknown'; - $get_port ||= 'unknown'; - $devid ||= 'unknown'; - $timeout ||= 'unknown'; - $url ||= 'unknown'; - unless ($response->is_success) { - my $failed_after = $res_time - $start_time; - if ($failed_after < 0.5) { - $self->broadcast_device_unreachable($dev->id); - error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); - } else { - $failed_after = sprintf("%.02f", $failed_after); - $self->broadcast_host_unreachable($dev->hostid); - $self->{skip_host}{$dev->hostid} = 1; - error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); - } - return; - } - - # at this point we can reach the host - $self->broadcast_host_reachable($dev->hostid); - $self->{iow}->restart_monitoring_if_needed($hostip); - - my %stats; - my $data = $response->content; - foreach (split(/\r?\n/, $data)) { - next unless /^(\w+)\s*:\s*(.+)$/; - $stats{$1} = $2; - } - - my ($used, $total) = ($stats{used}, $stats{total}); - unless ($used && $total) { - $used = "" unless defined $used; - $total = "" unless defined $total; - my $clen = length($data || ""); - error("dev$devid reports used = $used, total = $total, content-length: $clen, error?"); - return; - } - - # only update database every ~15 seconds per device - my $last_update = $self->{last_db_update}{$dev->id} || 0; - my $next_update = $last_update + UPDATE_DB_EVERY; - my $now = time(); - if ($now >= $next_update) { - Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), - mb_used => int($used / 1024), - devid => $devid); - $self->{last_db_update}{$devid} = $now; - } - - # next if we're not going to try this now - return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; - $self->{last_test_write}{$devid} = $now; - - # now we want to check if this device is writeable - - # first, create the test-write directory. this will return - # immediately after the first time, as the 'create_directory' - # function caches what it's already created. - $dev->create_directory("/dev$devid/test-write"); - - my $num = int(rand 100); # this was "$$-$now" before, but we don't yet have a cleaner in mogstored for these files - my $puturl = "http://$hostip:$port/dev$devid/test-write/test-write-$num"; - my $content = "time=$now rand=$num"; - my $req = HTTP::Request->new(PUT => $puturl); - $req->content($content); - - # TODO: guard against race-conditions with double-check on failure - - # now, depending on what happens - my $resp = $ua->request($req); - if ($resp->is_success) { - # now let's get it back to verify; note we use the get_port to verify that - # the distinction works (if we have one) - my $geturl = "http://$hostip:$get_port/dev$devid/test-write/test-write-$num"; - my $testwrite = $ua->get($geturl); - - # if success and the content matches, mark it writeable - if ($testwrite->is_success && $testwrite->content eq $content) { - $self->broadcast_device_writeable($devid); - debug("dev$devid: used = $used, total = $total, writeable = 1"); - return; - } - } - - # if we fall through to here, then we know that something is not so good, so mark it readable - # which is guaranteed given we even tested writeability - $self->broadcast_device_readable($devid); - debug("dev$devid: used = $used, total = $total, writeable = 0"); -} - 1; # Local Variables: From ebf8a5a8dc9b4452671f7816b99583bf7934e9b1 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 May 2011 19:28:38 -0700 Subject: [PATCH 060/405] Mass nuke unused code and fix most tests t/multiple-hosts-replpol.t needs more work before it passes again. Kindly noting that FSCK and a few things don't have coverage. fix replpolicy tests Restore default class "0" Also fix calls from mogfileinfo and mogfiledebug Clean some vestigial Monitor worker code Fix diff_hash. Splitting the tests out into one-per-line so we can get better errors if it ever fails again, also easier to read. --- lib/MogileFS/Class.pm | 246 +++--------------- lib/MogileFS/DevFID.pm | 2 +- lib/MogileFS/Device.pm | 415 ++++++++----------------------- lib/MogileFS/Domain.pm | 146 ++--------- lib/MogileFS/Factory.pm | 6 + lib/MogileFS/Factory/Class.pm | 4 +- lib/MogileFS/Factory/Device.pm | 4 +- lib/MogileFS/Factory/Domain.pm | 14 +- lib/MogileFS/Factory/Host.pm | 4 +- lib/MogileFS/Host.pm | 282 ++++----------------- lib/MogileFS/NewClass.pm | 59 ----- lib/MogileFS/NewDevice.pm | 310 ----------------------- lib/MogileFS/NewDomain.pm | 45 ---- lib/MogileFS/NewHost.pm | 110 -------- lib/MogileFS/Server.pm | 12 +- lib/MogileFS/Worker/Monitor.pm | 32 +-- lib/MogileFS/Worker/Query.pm | 8 +- lib/MogileFS/Worker/Replicate.pm | 1 - t/01-domain-class.t | 11 +- t/02-host-device.t | 6 +- t/domains-classes.t | 78 ------ t/hosts-devices.t | 50 ---- t/multiple-hosts-replpol.t | 18 +- t/store.t | 16 +- 24 files changed, 277 insertions(+), 1602 deletions(-) delete mode 100644 lib/MogileFS/NewClass.pm delete mode 100644 lib/MogileFS/NewDevice.pm delete mode 100644 lib/MogileFS/NewDomain.pm delete mode 100644 lib/MogileFS/NewHost.pm delete mode 100644 t/domains-classes.t delete mode 100644 t/hosts-devices.t diff --git a/lib/MogileFS/Class.pm b/lib/MogileFS/Class.pm index a35445ae..b1861b79 100644 --- a/lib/MogileFS/Class.pm +++ b/lib/MogileFS/Class.pm @@ -1,245 +1,59 @@ package MogileFS::Class; use strict; +use warnings; +use MogileFS::Util qw(throw); -my %singleton; # dmid -> classid -> MogileFS::Class -my $last_load = 0; +=head1 -# return MogileFS::Class object for a given fid id/obj -sub of_fid { - my ($pkg, $fid) = @_; - return undef unless $fid; - # make $fid into a FID object: - $fid = MogileFS::FID->new($fid) unless ref $fid; - return undef unless $fid->exists; - my $cl = $pkg->of_dmid_classid($fid->dmid, $fid->classid); - return $cl if $cl; - # return the default class for this file, not undef. this should - # always return a valid class for a valid FID. files need to - # always have a mindevcount (default of 2), repl policy, etc. - return $pkg->of_dmid_classid($fid->dmid, 0); -} - -# return MogileFS::Class, given a dmid and classid. or returns the -# default class, if classid is bogus. -sub of_dmid_classid { - my ($pkg, $dmid, $classid) = @_; - return $singleton{$dmid}{$classid} if - $singleton{$dmid} && - $singleton{$dmid}{$classid} && - $singleton{$dmid}{$classid}->{_loaded}; - $pkg->reload_classes; - return $singleton{$dmid}{$classid} if - $singleton{$dmid} && - $singleton{$dmid}{$classid}; - return undef; -} - -# marks everything dirty, triggering a reload, but doesn't actually -# reload now. will happen later, next time somebody loads something. -sub invalidate_cache { - my $pkg = shift; - $last_load = 0; - $pkg->_foreach_singleton(sub { - my $cl = shift; - $cl->{_loaded} = 0; - }); - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("class"); - } -} - -sub check_cache { - my $pkg = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Class->reload_classes; -} - -sub reload_classes { - my $pkg = shift; - my $now = time(); - - # mark everything as invalid for now - $pkg->_foreach_singleton(sub { - my ($cl, $dmid, $clid) = @_; - $cl->{_loaded} = 0; - }); +MogileFS::Class - Class class. - # install the default classes (classid=0) - my $default_min = MogileFS->config('default_mindevcount'); - foreach my $dom (MogileFS::Domain->domains) { - my $dmid = $dom->id; - my $cl = - ($singleton{$dmid}{0} = - bless { - dmid => $dmid, - classid => 0, - name => "default", - mindevcount => $default_min, - }, $pkg); - $cl->{_loaded} = 1; - } - - foreach my $row (Mgd::get_store()->get_all_classes) { - my $cl = - ($singleton{$row->{dmid}}{$row->{classid}} = - bless { - dmid => $row->{dmid}, - classid => $row->{classid}, - name => $row->{classname}, - mindevcount => $row->{mindevcount}, - replpolicy => $row->{replpolicy}, - }, $pkg); - $cl->{_loaded} = 1; - } - - # delete any singletons that weren't just loaded - $pkg->_foreach_singleton(sub { - my ($cl, $dmid, $clid) = @_; - return if $cl->{_loaded}; - delete $singleton{$dmid}{$clid}; - }); - - $last_load = $now; -} +=cut -# enumerates all loaded singletons (without reloading/checked caches), -# calling the given subref with (MogileFS::Class, $dmid, $classid) -sub _foreach_singleton { - my ($pkg, $cb) = @_; - foreach my $dmid (keys %singleton) { - foreach my $clid (keys %{$singleton{$dmid}}) { - $cb->($singleton{$dmid}{$clid}, $dmid, $clid); - } - } +sub new_from_args { + my ($class, $args, $domain_factory) = @_; + return bless { + domain_factory => $domain_factory, + mindevcount => 2, + %{$args}, + }, $class; } -# enumerates all classes, (reloading if needed), calling the given -# subref with (MogileFS::Class, $dmid, $classid) -sub foreach { - my ($pkg, $cb) = @_; - $pkg->check_cache; - $pkg->_foreach_singleton($cb); -} - -sub class_name { - my ($pkg, $dmid, $classid) = @_; - my $cls = $pkg->of_dmid_classid($dmid, $classid) - or return undef; - return $cls->name; -} - -sub class_id { - my ($pkg, $dmid, $classname) = @_; - return undef unless $dmid > 0 && length $classname; - # tries to get it first from cache, then reloads and tries again. - my $get = sub { - foreach my $cl ($pkg->classes_of_domain($dmid)) { - return $cl->classid if $cl->name eq $classname; - } - return undef; - }; - my $id = $get->(); - return $id if $id; - MogileFS::Class->reload_classes; - return $get->(); -} - -sub classes_of_domain { - my ($pkg, $doma) = @_; - my $dmid = ref $doma ? $doma->id : $doma; - $pkg->check_cache; - return () unless $dmid && $singleton{$dmid}; - return values %{ $singleton{$dmid} }; -} - -# throws 'dup' on duplicate name, returns class otherwise -sub create_class { - my ($pkg, $dom, $clname) = @_; - my $clid = Mgd::get_store()->create_class($dom->id, $clname); - return $pkg->of_dmid_classid($dom->id, $clid); -} - -# -------------------------------------------------------------------------- # Instance methods: -# -------------------------------------------------------------------------- -sub domainid { $_[0]{dmid} } -sub classid { $_[0]{classid} } -sub mindevcount { $_[0]{mindevcount} } +sub id { $_[0]{classid} } +sub name { $_[0]{classname} } +sub mindevcount { $_[0]{mindevcount} } +sub dmid { $_[0]{dmid} } sub repl_policy_string { my $self = shift; - # if they've actually configured one, it gets used: - return $self->{replpolicy} if $self->{replpolicy}; - # else, the historical default: - return "MultipleHosts()"; + return $self->{replpolicy} ? $self->{replpolicy} + : 'MultipleHosts()'; } sub repl_policy_obj { my $self = shift; - return $self->{_repl_policy_obj} if $self->{_repl_policy_obj}; - my $polstr = $self->repl_policy_string; - # parses it: - my $pol = MogileFS::ReplicationPolicy->new_from_policy_string($polstr); - return $self->{_repl_policy_obj} = $pol; + if (! $self->{_repl_policy_obj}) { + my $polstr = $self->repl_policy_string; + # Parses the string. + $self->{_repl_policy_obj} = + MogileFS::ReplicationPolicy->new_from_policy_string($polstr); + } + return $self->{_repl_policy_obj}; } -sub name { $_[0]{name} } - sub domain { my $self = shift; - return MogileFS::Domain->of_dmid($self->domainid); + return $self->{domain_factory}->get_by_id($self->{dmid}); } -# throws 'dup' (for name conflict), returns 1 otherwise -sub set_name { - my ($self, $name) = @_; - return 1 if $self->name eq $name; - Mgd::get_store()->update_class_name(dmid => $self->domainid, - classid => $self->classid, - classname => $name); - $self->{name} = $name; - MogileFS::Class->invalidate_cache; - return 1; -} - -sub set_mindevcount { - my ($self, $n) = @_; - return 1 if $self->mindevcount == $n; - Mgd::get_store()->update_class_mindevcount(dmid => $self->domainid, - classid => $self->classid, - mindevcount => $n); - $self->{mindevcount} = $n; - MogileFS::Class->invalidate_cache; - return 1; -} - -sub set_replpolicy { - my ($self, $pol) = @_; - return 1 if $self->repl_policy_string eq $pol; - Mgd::get_store()->update_class_replpolicy(dmid => $self->domainid, - classid => $self->classid, - replpolicy => $pol); - $self->{replpolicy} = $pol; - MogileFS::Class->invalidate_cache; - return 1; - -} - -# throws: -# 'has_files' -sub delete { +sub has_files { my $self = shift; - throw("has_files") if $self->has_files; - Mgd::get_store()->delete_class($self->domainid, $self->classid); - MogileFS::Class->invalidate_cache; - return 1; + return Mgd::get_store()->class_has_files($self->{dmid}, $self->id); } -sub has_files { - my $self = shift; - return Mgd::get_store()->class_has_files($self->domainid, $self->classid); +sub observed_fields { + return {}; } 1; diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index e235e206..e480ac37 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -53,7 +53,7 @@ sub get_url { sub vivify_directories { my $self = shift; my $url = $self->url; - MogileFS::Device->vivify_directories($url); + $self->device()->vivify_directories($url); } # returns 0 on missing, diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index d0f82f74..09ad5551 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -1,273 +1,112 @@ package MogileFS::Device; use strict; use warnings; -use Carp qw(croak); -use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); +use Carp qw/croak/; +use MogileFS::Util qw(throw); use MogileFS::Util qw(okay_args device_state error); +=head1 + +MogileFS::Device - device class + +=cut + BEGIN { my $testing = $ENV{TESTING} ? 1 : 0; eval "sub TESTING () { $testing }"; } -my %singleton; # devid -> instance -my $last_load = 0; # unixtime we last reloaded devices from database -my $all_loaded = 0; # bool: have we loaded all the devices? - -# throws "dup" on duplicate devid. returns new MogileFS::Device object on success. -# %args include devid, hostid, and status (in (alive, down, readonly)) -sub create { - my ($pkg, %args) = @_; - okay_args(\%args, qw(devid hostid status)); - my $devid = Mgd::get_store()->create_device(@args{qw(devid hostid status)}); - MogileFS::Device->invalidate_cache; - return $pkg->of_devid($devid); -} +my @observed_fields = qw/observed_state utilization/; +my @fields = (qw/hostid status weight mb_total mb_used mb_asof devid/, + @observed_fields); -sub of_devid { - my ($class, $devid) = @_; - croak("Invalid devid") unless $devid; - return $singleton{$devid} ||= bless { - devid => $devid, - no_mkcol => 0, - _loaded => 0, +sub new_from_args { + my ($class, $args, $host_factory) = @_; + my $self = bless { + host_factory => $host_factory, + %{$args}, }, $class; -} - -sub t_wipe_singletons { - %singleton = (); - $last_load = time(); # fake it -} - -sub t_init { - my ($self, $hostid, $state) = @_; - $self->{_loaded} = 1; - my $dstate = device_state($state) or - die "Bogus state"; + # FIXME: No guarantee (as of now?) that hosts get loaded before devs. + #$self->host || die "No host for $self->{devid} (host $self->{hostid})"; - $self->{hostid} = $hostid; - $self->{status} = $state; - $self->{observed_state} = "writeable"; + croak "invalid device observed state '$self->{observed_state}', valid: writeable, readable, unreachable" + if $self->{observed_state} && $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; - # say it's 10% full, of 1GB - $self->{mb_total} = 1000; - $self->{mb_used} = 100; + return $self; } -sub from_devid_and_hostname { - my ($class, $devid, $hostname) = @_; - my $dev = MogileFS::Device->of_devid($devid) - or return undef; - return undef unless $dev->exists; - my $host = $dev->host; - return undef - unless $host && $host->exists && $host->hostname eq $hostname; - return $dev; -} - -sub vivify_directories { - my ($class, $path) = @_; - - # $path is something like: - # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid - - # three directories we'll want to make: - # http://10.0.0.26:7500/dev2/0 - # http://10.0.0.26:7500/dev2/0/000 - # http://10.0.0.26:7500/dev2/0/000/148 - - croak "non-HTTP mode no longer supported" unless $path =~ /^http/; - return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; - my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); +# Instance methods - my $dev = MogileFS::Device->of_devid($devid); - return 0 unless $dev->exists; +sub id { return $_[0]{devid} } +sub devid { return $_[0]{devid} } +sub name { return $_[0]{devid} } +sub status { return $_[0]{status} } +sub weight { return $_[0]{weight} } +sub hostid { return $_[0]{hostid} } - $dev->create_directory("/dev$devid/$p1"); - $dev->create_directory("/dev$devid/$p1/$p2"); - $dev->create_directory("/dev$devid/$p1/$p2/$p3"); -} - -# returns array of all MogileFS::Device objects -sub devices { - my $class = shift; - MogileFS::Device->check_cache; - return values %singleton; -} - -# returns hashref of devid -> $device_obj -# you're allowed to mess with this returned hashref -sub map { - my $class = shift; - my $ret = {}; - foreach my $d (MogileFS::Device->devices) { - $ret->{$d->id} = $d; - } - return $ret; -} - -sub reload_devices { - my $class = shift; - - # mark them all invalid for now, until they're reloaded - foreach my $dev (values %singleton) { - $dev->{_loaded} = 0; - } - - MogileFS::Host->check_cache; - - my $sto = Mgd::get_store(); - foreach my $row ($sto->get_all_devices) { - my $dev = - MogileFS::Device->of_devid($row->{devid}); - $dev->absorb_dbrow($row); - } - - # get rid of ones that could've gone away: - foreach my $devid (keys %singleton) { - my $dev = $singleton{$devid}; - delete $singleton{$devid} unless $dev->{_loaded} - } - - $all_loaded = 1; - $last_load = time(); -} - -sub invalidate_cache { - my $class = shift; - - # so next time it's invalid and won't be used old - $last_load = 0; - $all_loaded = 0; - $_->{_loaded} = 0 foreach values %singleton; - - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("device"); - } -} - -sub check_cache { - my $class = shift; - my $now = $Mgd::nowish || time(); - return if $last_load > $now - DEVICE_SUMMARY_CACHE_TIMEOUT; - MogileFS::Device->reload_devices; -} - -# -------------------------------------------------------------------------- - -sub devid { return $_[0]{devid} } -sub id { return $_[0]{devid} } - -sub absorb_dbrow { - my ($dev, $hashref) = @_; - foreach my $k (qw(hostid mb_total mb_used mb_asof status weight)) { - $dev->{$k} = $hashref->{$k}; - } - - $dev->{$_} ||= 0 foreach qw(mb_total mb_used mb_asof); - - my $host = MogileFS::Host->of_hostid($dev->{hostid}); - if ($host && $host->exists) { - my $host_status = $host->status; - die "No status" unless $host_status =~ /^\w+$/; - # FIXME: not sure I like this, changing the in-memory version - # of the configured status is. I'd rather this be calculated - # in an accessor. - if ($dev->{status} eq 'alive' && $host_status ne 'alive') { - $dev->{status} = "down" - } - } else { - if ($dev->{status} eq "dead") { - # ignore dead devices without hosts. not a big deal. - } else { - die "No host for dev $dev->{devid} (host $dev->{hostid})"; - } - } - - $dev->{_loaded} = 1; +sub host { + my $self = shift; + return $self->{host_factory}->get_by_id($self->{hostid}); } # returns 0 if not known, else [0,1] sub percent_free { - my $dev = shift; - $dev->_load; - return 0 unless $dev->{mb_total} && defined $dev->{mb_used}; - return 1 - ($dev->{mb_used} / $dev->{mb_total}); + my $self = shift; + return 0 unless $self->{mb_total} && defined $self->{mb_used}; + return 1 - ($self->{mb_used} / $self->{mb_total}); } # returns undef if not known, else [0,1] sub percent_full { - my $dev = shift; - $dev->_load; - return undef unless $dev->{mb_total} && defined $dev->{mb_used}; - return $dev->{mb_used} / $dev->{mb_total}; + my $self = shift; + return undef unless $self->{mb_total} && defined $self->{mb_used}; + return $self->{mb_used} / $self->{mb_total}; } -our $util_no_broadcast = 0; - -sub set_observed_utilization { - my ($dev, $util) = @_; - $dev->{utilization} = $util; - my $devid = $dev->id; - - return if $util_no_broadcast; +# FIXME: $self->mb_free? +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + my $ret = { (map { $_ => $self->{$_} } @tofetch), + 'mb_free' => $self->mb_free }; + return $ret; +} - my $worker = MogileFS::ProcManager->is_child or return; - $worker->send_to_parent(":set_dev_utilization $devid $util"); +sub observed_fields { + return $_[0]->fields(@observed_fields); } sub observed_utilization { - my ($dev) = @_; + my $self = shift; if (TESTING) { - my $weight_varname = 'T_FAKE_IO_DEV' . $dev->id; + my $weight_varname = 'T_FAKE_IO_DEV' . $self->id; return $ENV{$weight_varname} if defined $ENV{$weight_varname}; } - return $dev->{utilization}; -} - -sub set_observed_state { - my ($dev, $state) = @_; - croak "set_observed_state() with invalid device state '$state', valid: writeable, readable, unreachable" - if $state !~ /^(?:writeable|readable|unreachable)$/; - $dev->{observed_state} = $state; + return $self->{utilization}; } sub observed_writeable { - my $dev = shift; - return 0 unless $dev->{observed_state} && $dev->{observed_state} eq "writeable"; - my $host = $dev->host - or return 0; + my $self = shift; + return 0 unless $self->{observed_state} && $self->{observed_state} eq 'writeable'; + my $host = $self->host or return 0; return 0 unless $host->observed_reachable; return 1; } sub observed_readable { - my $dev = shift; - return $dev->{observed_state} && $dev->{observed_state} eq "readable"; + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'readable'; } sub observed_unreachable { - my $dev = shift; - return $dev->{observed_state} && $dev->{observed_state} eq "unreachable"; -} - -# returns status as a string (SEE ALSO: dstate, returns DeviceState object, -# which knows the traits/capabilities of that named state) -sub status { - my $dev = shift; - $dev->_load; - return $dev->{status}; -} - -sub weight { - my $dev = shift; - $dev->_load; - return $dev->{weight}; + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; } +# FIXME: This pattern is weird. Store the object on new? sub dstate { my $ds = device_state($_[0]->status); return $ds if $ds; @@ -276,34 +115,34 @@ sub dstate { } sub can_delete_from { - my $self = shift; - return $self->dstate->can_delete_from; + return $_[0]->dstate->can_delete_from; } sub can_read_from { - my $self = shift; - return $self->dstate->can_read_from; + return $_[0]->dstate->can_read_from; } +# FIXME: Is there a (unrelated to this code) bug where new files aren't tested +# against the free space limit before being stored or replicated somewhere? sub should_get_new_files { - my $dev = shift; - my $dstate = $dev->dstate; + my $self = shift; + my $dstate = $self->dstate; return 0 unless $dstate->should_get_new_files; - return 0 unless $dev->observed_writeable; - return 0 unless $dev->host->should_get_new_files; - + return 0 unless $self->observed_writeable; + return 0 unless $self->host->should_get_new_files; # have enough disk space? (default: 100MB) my $min_free = MogileFS->config("min_free_space"); - return 0 if $dev->{mb_total} && - $dev->mb_free < $min_free; + return 0 if $self->{mb_total} && + $self->mb_free < $min_free; return 1; } sub mb_free { my $self = shift; - return $self->{mb_total} - $self->{mb_used}; + return $self->{mb_total} - $self->{mb_used} + if $self->{mb_total} && $self->{mb_used}; } sub mb_used { @@ -312,41 +151,22 @@ sub mb_used { # currently the same policy, but leaving it open for differences later. sub should_get_replicated_files { - my $dev = shift; - return $dev->should_get_new_files; + return $_[0]->should_get_new_files; } sub not_on_hosts { - my ($dev, @hosts) = @_; - my @hostids = map { ref($_) ? $_->hostid : $_ } @hosts; - my $my_hostid = $dev->hostid; + my ($self, @hosts) = @_; + my @hostids = map { ref($_) ? $_->id : $_ } @hosts; + my $my_hostid = $self->id; return (grep { $my_hostid == $_ } @hostids) ? 0 : 1; } -sub exists { - my $dev = shift; - $dev->_try_load; - return $dev->{_loaded}; -} - -sub host { - my $dev = shift; - return MogileFS::Host->of_hostid($dev->hostid); -} - -sub hostid { - my $dev = shift; - $dev->_load; - return $dev->{hostid}; -} - +# "cached" by nature of the monitor worker testing this. sub doesnt_know_mkcol { - my $self = shift; - # TODO: forget this periodically? maybe whenever host/device is observed down? - # in case webserver changes. - return $self->{no_mkcol}; + return $_[0]->{no_mkcol}; } +# Gross class-based singleton cache. my %dir_made; # /dev/path -> $time my $dir_made_lastclean = 0; # returns 1 on success, 0 on failure @@ -356,7 +176,7 @@ sub create_directory { # rfc2518 says we "should" use a trailing slash. Some servers # (nginx) appears to require it. - $uri .= '/' unless $uri =~ m!/$!; + $uri .= '/' unless $uri =~ m/\/$/; return 1 if $dir_made{$uri}; @@ -377,8 +197,7 @@ sub create_directory { # if they don't support this method, remember that if ($ans && $ans =~ m!HTTP/1\.[01] (400|501)!) { $self->{no_mkcol} = 1; - # TODO: move this into method on device, which propagates to parent - # and also receive from parent. so all query workers share this knowledge + # TODO: move this into method in *monitor* worker return 1; } @@ -423,53 +242,19 @@ sub fid_chunks { } sub forget_about { - my ($dev, $fid) = @_; - Mgd::get_store()->remove_fidid_from_devid($fid->id, $dev->id); + my ($self, $fid) = @_; + Mgd::get_store()->remove_fidid_from_devid($fid->id, $self->id); return 1; } sub usage_url { - my $dev = shift; - my $host = $dev->host; + my $self = shift; + my $host = $self->host; my $get_port = $host->http_get_port; my $hostip = $host->ip; - return "http://$hostip:$get_port/dev$dev->{devid}/usage"; -} - -sub overview_hashref { - my $dev = shift; - $dev->_load; - - my $ret = {}; - foreach my $k (qw(devid hostid status weight observed_state - mb_total mb_used mb_asof utilization)) { - $ret->{$k} = $dev->{$k}; - } - $ret->{mb_free} = $dev->mb_free; - return $ret; + return "http://$hostip:$get_port/dev$self->{devid}/usage"; } -sub set_weight { - my ($dev, $weight) = @_; - my $sto = Mgd::get_store(); - $sto->set_device_weight($dev->id, $weight); - MogileFS::Device->invalidate_cache; -} - -sub set_state { - my ($dev, $state) = @_; - my $dstate = device_state($state) or - die "Bogus state"; - my $sto = Mgd::get_store(); - $sto->set_device_state($dev->id, $state); - MogileFS::Device->invalidate_cache; - - # wake a reaper process up from sleep to get started as soon as possible - # on re-replication - MogileFS::ProcManager->wake_a("reaper") if $dstate->should_wake_reaper; -} - -# given the current state, can this device transition into the provided $newstate? sub can_change_to_state { my ($self, $newstate) = @_; # don't allow dead -> alive transitions. (yes, still possible @@ -479,19 +264,31 @@ sub can_change_to_state { return 1; } -# -------------------------------------------------------------------------- +sub vivify_directories { + my ($self, $path) = @_; -sub _load { - return if $_[0]{_loaded}; - MogileFS::Device->reload_devices; - return if $_[0]{_loaded}; - my $dev = shift; - croak "Device $dev->{devid} doesn't exist.\n"; + # $path is something like: + # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid + + # three directories we'll want to make: + # http://10.0.0.26:7500/dev2/0 + # http://10.0.0.26:7500/dev2/0/000 + # http://10.0.0.26:7500/dev2/0/000/148 + + croak "non-HTTP mode no longer supported" unless $path =~ /^http/; + return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; + my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); + + die "devid mismatch" unless $self->id == $devid; + + $self->create_directory("/dev$devid/$p1"); + $self->create_directory("/dev$devid/$p1/$p2"); + $self->create_directory("/dev$devid/$p1/$p2/$p3"); } -sub _try_load { - return if $_[0]{_loaded}; - MogileFS::Device->reload_devices; +# FIXME: Remove this once vestigial code is removed. +sub set_observed_utilization { + return 1; } 1; diff --git a/lib/MogileFS/Domain.pm b/lib/MogileFS/Domain.pm index 2130d6ea..4c15a421 100644 --- a/lib/MogileFS/Domain.pm +++ b/lib/MogileFS/Domain.pm @@ -3,121 +3,24 @@ use strict; use warnings; use MogileFS::Util qw(throw); -# -------------------------------------------------------------------------- -# Class methods: -# -------------------------------------------------------------------------- +=head1 -my %singleton; # dmid -> MogileFS::Domain +MogileFS::Domain - domain class. -my %id2name; # dmid -> domainname(namespace) -my %name2id; # domainname(namespace) -> dmid +=cut -my $last_load = 0; - -# return singleton MogileFS::Domain, given a dmid -sub of_dmid { - my ($pkg, $dmid) = @_; - return undef unless $dmid; - return $singleton{$dmid} if $singleton{$dmid}; - - my $ns = $pkg->name_of_id($dmid) - or return undef; - - return $singleton{$dmid} = bless { - dmid => $dmid, - ns => $ns, - }, $pkg; -} - -# return singleton MogileFS::Domain, given a domain(namespace) -sub of_namespace { - my ($pkg, $ns) = @_; - return undef unless $ns; - my $dmid = $pkg->id_of_name($ns) - or return undef; - return MogileFS::Domain->of_dmid($dmid); -} - -# name to dmid, reloading if not in cache -sub id_of_name { - my ($pkg, $domain) = @_; - return $name2id{$domain} if $name2id{$domain}; - $pkg->reload_domains; - return $name2id{$domain}; -} - -# dmid to name, reloading if not in cache -sub name_of_id { - my ($pkg, $dmid) = @_; - return $id2name{$dmid} if $id2name{$dmid}; - $pkg->reload_domains; - return $id2name{$dmid}; -} - -# force reload of cache -sub reload_domains { - my $now = time(); - my $sto = Mgd::get_store(); - %name2id = $sto->get_all_domains; - %id2name = (); - while (my ($k, $v) = each %name2id) { - $id2name{$v} = $k; - } - - # Blow singleton cache on reload. Otherwise a *change* in data may not be - # reflected. - %singleton = (); - - $last_load = $now; +sub new_from_args { + my ($class, $args, $class_factory) = @_; + return bless { + class_factory => $class_factory, + %{$args}, + }, $class; } -# FIXME: should probably have an invalidate_cache variant that only -# flushes locally (for things like "get_domains" or "get_hosts", where -# it needs to be locally correct for the semantics of the command, but -# no need to propagate a cache invalidation to our peers) -sub invalidate_cache { - $last_load = 0; - %id2name = (); - %name2id = (); - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("domain"); - } -} - -sub check_cache { - my $pkg = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Domain->reload_domains; -} - -sub domains { - my $pkg = shift; - $pkg->check_cache; - return map { $pkg->of_dmid($_) } keys %id2name; -} - -# create a new domain given a name, returns MogileFS::Domain object on success. -# throws errors on failure. error codes include: -# "dup" -- on duplicate name -sub create { - my ($pkg, $name) = @_; - - # throws 'dup': - my $dmid = Mgd::get_store()->create_domain($name) - or die "create domain didn't return a dmid"; - - # return the domain id we created - MogileFS::Domain->invalidate_cache; - return MogileFS::Domain->of_dmid($dmid); -} - -# -------------------------------------------------------------------------- # Instance methods: -# -------------------------------------------------------------------------- -sub id { $_[0]->{dmid} } -sub name { $_[0]->{ns} } +sub id { $_[0]{dmid} } +sub name { $_[0]{namespace} } sub has_files { my $self = shift; @@ -126,34 +29,17 @@ sub has_files { } sub classes { - my $dom = shift; - # return a bunch of class objects for this domain - return MogileFS::Class->classes_of_domain($dom); -} - -# returns true if deleted. throws exceptions on errors. exception codes: -# 'has_files' if it has files. -sub delete { my $self = shift; - throw("has_files") if $self->has_files; - # TODO: delete its classes - my $rv = Mgd::get_store()->delete_domain($self->id); - MogileFS::Domain->invalidate_cache; - return $rv; + return $self->{class_factory}->get_all($self); } -# returns named class of domain sub class { - my ($dom, $clname) = @_; - foreach my $cl (MogileFS::Class->classes_of_domain($dom)) { - return $cl if $cl->name eq $clname; - } - return; + my $self = shift; + return $self->{class_factory}->get_by_name($self, $_[0]); } -sub create_class { - my ($dom, $clname) = @_; - return MogileFS::Class->create_class($dom, $clname); +sub observed_fields { + return {}; } 1; diff --git a/lib/MogileFS/Factory.pm b/lib/MogileFS/Factory.pm index 089ee4e1..bd7bd4c3 100644 --- a/lib/MogileFS/Factory.pm +++ b/lib/MogileFS/Factory.pm @@ -36,6 +36,12 @@ sub get_factory { return $singleton{$class}; } +# Allow unit tests to blow us up. +sub t_wipe { + my $class = shift; + delete $singleton{$class}; +} + # because 'add' means bail if already exists. sub set { my $self = shift; diff --git a/lib/MogileFS/Factory/Class.pm b/lib/MogileFS/Factory/Class.pm index 5044fcfd..b78ce496 100644 --- a/lib/MogileFS/Factory/Class.pm +++ b/lib/MogileFS/Factory/Class.pm @@ -3,7 +3,7 @@ use strict; use warnings; use base 'MogileFS::Factory'; -use MogileFS::NewClass; +use MogileFS::Class; # This class is a reimplementation since classids and classnames # are not globally unique... uses the same interface. @@ -12,7 +12,7 @@ sub set { my ($self, $args) = @_; my $domain_factory = MogileFS::Factory::Domain->get_factory; - my $class = MogileFS::NewClass->new_from_args($args, $domain_factory); + my $class = MogileFS::Class->new_from_args($args, $domain_factory); my $dmid = $class->dmid; $self->{by_id}->{$dmid}->{$class->id} = $class; $self->{by_name}->{$dmid}->{$class->name} = $class; diff --git a/lib/MogileFS/Factory/Device.pm b/lib/MogileFS/Factory/Device.pm index 91b3af9f..c9cd54b7 100644 --- a/lib/MogileFS/Factory/Device.pm +++ b/lib/MogileFS/Factory/Device.pm @@ -3,12 +3,12 @@ use strict; use warnings; use base 'MogileFS::Factory'; -use MogileFS::NewDevice; +use MogileFS::Device; sub set { my ($self, $args) = @_; my $hostfactory = MogileFS::Factory::Host->get_factory; - return $self->SUPER::set(MogileFS::NewDevice->new_from_args($args, $hostfactory)); + return $self->SUPER::set(MogileFS::Device->new_from_args($args, $hostfactory)); } 1; diff --git a/lib/MogileFS/Factory/Domain.pm b/lib/MogileFS/Factory/Domain.pm index 92fc60ef..36cee74d 100644 --- a/lib/MogileFS/Factory/Domain.pm +++ b/lib/MogileFS/Factory/Domain.pm @@ -3,12 +3,22 @@ use strict; use warnings; use base 'MogileFS::Factory'; -use MogileFS::NewDomain; +use MogileFS::Domain; sub set { my ($self, $args) = @_; my $classfactory = MogileFS::Factory::Class->get_factory; - return $self->SUPER::set(MogileFS::NewDomain->new_from_args($args, $classfactory)); + my $dom = $self->SUPER::set(MogileFS::Domain->new_from_args($args, $classfactory)); + + # Stupid awkward classes have a magic "default" + # If it exists in the DB, it will be overridden. + my $cls = $classfactory->get_by_id($dom->id, 0); + unless ($cls) { + $classfactory->set({ dmid => $dom->id, classid => 0, + classname => 'default', + mindevcount => MogileFS->config('default_mindevcount')}); + } + return $dom; } 1; diff --git a/lib/MogileFS/Factory/Host.pm b/lib/MogileFS/Factory/Host.pm index 08b6554c..4c8c1e3c 100644 --- a/lib/MogileFS/Factory/Host.pm +++ b/lib/MogileFS/Factory/Host.pm @@ -3,12 +3,12 @@ use strict; use warnings; use base 'MogileFS::Factory'; -use MogileFS::NewHost; +use MogileFS::Host; sub set { my ($self, $args) = @_; my $devfactory = MogileFS::Factory::Device->get_factory; - return $self->SUPER::set(MogileFS::NewHost->new_from_args($args, $devfactory)); + return $self->SUPER::set(MogileFS::Host->new_from_args($args, $devfactory)); } 1; diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index c49c1e74..eb2b55dd 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -1,246 +1,87 @@ package MogileFS::Host; use strict; use warnings; +use MogileFS::Util qw(throw); use Net::Netmask; use Carp qw(croak); use MogileFS::Connection::Mogstored; -my %singleton; # hostid -> instance -my $last_load = 0; # unixtime of last 'reload_hosts' -my $all_loaded = 0; # bool: have we loaded all the hosts? +=head1 -# returns MogileFS::Host object, or throws 'dup' error -sub create { - my ($pkg, $hostname, $ip) = @_; - my $hid = Mgd::get_store()->create_host($hostname, $ip); - return MogileFS::Host->of_hostid($hid); -} - -sub t_wipe_singletons { - %singleton = (); -} - -sub of_hostid { - my ($class, $hostid) = @_; - return undef unless $hostid; - return $singleton{$hostid} ||= bless { - hostid => $hostid, - _loaded => 0, - }, $class; -} - -sub of_hostname { - my ($class, $hostname) = @_; +MogileFS::Host - host class - # reload if it's been awhile - MogileFS::Host->check_cache; - foreach my $host ($class->hosts) { - return $host if $host->{hostname} eq $hostname; - } - - # force a reload - MogileFS::Host->reload_hosts; - foreach my $host ($class->hosts) { - return $host if $host->{hostname} eq $hostname; - } +=cut - return undef; -} - -sub invalidate_cache { - my $class = shift; - - # so next time it's invalid and won't be used old - $last_load = 0; - $all_loaded = 0; - $_->{_loaded} = 0 foreach values %singleton; - - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("host"); - } -} - -# force a reload of all host objects. -sub reload_hosts { - my $class = shift; - - # mark them all invalid for now, until they're reloaded - foreach my $host (values %singleton) { - $host->{_loaded} = 0; - } +# Centralized here instead of three places. +my @observed_fields = qw/observed_state/; +my @fields = (qw/hostid hostname hostip status http_port http_get_port altip altmask/, + @observed_fields); - my $sto = Mgd::get_store(); - foreach my $row ($sto->get_all_hosts) { - die unless $row->{status} =~ /^\w+$/; - my $ho = - MogileFS::Host->of_hostid($row->{hostid}); - $ho->absorb_dbrow($row); - } - - # get rid of ones that could've gone away: - foreach my $hostid (keys %singleton) { - my $host = $singleton{$hostid}; - delete $singleton{$hostid} unless $host->{_loaded} - } - - $all_loaded = 1; - $last_load = time(); -} - -# reload host objects if it hasn't been done in last 5 seconds -sub check_cache { - my $class = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Host->reload_hosts; -} - -sub hosts { - my $class = shift; - $class->reload_hosts unless $all_loaded; - return values %singleton; -} - -# -------------------------------------------------------------------------- - -sub id { $_[0]{hostid} } -sub hostid { $_[0]{hostid} } - -sub absorb_dbrow { - my ($host, $hashref) = @_; - foreach my $k (qw(status hostname hostip http_port http_get_port altip altmask)) { - $host->{$k} = $hashref->{$k}; - } - $host->{mask} = - ($host->{altip} && $host->{altmask}) ? - Net::Netmask->new2($host->{altmask}) : - undef; - - $host->{_loaded} = 1; -} - -sub set_observed_state { - my ($host, $state) = @_; - croak "set_observed_state() with invalid host state '$state', valid: reachable, unreachable" - if $state !~ /^(?:reachable|unreachable)$/; - $host->{observed_state} = $state; -} +# TODO: Validate a few things: state, observed state. +sub new_from_args { + my ($class, $args, $dev_factory) = @_; + my $self = bless { + dev_factory => $dev_factory, + %{$args}, + }, $class; -sub observed_reachable { - my $host = shift; - return $host->{observed_state} && $host->{observed_state} eq "reachable"; -} + $self->{mask} = ($self->{altip} && $self->{altmask}) ? + Net::Netmask->new2($self->{altmask}) : undef; -sub observed_unreachable { - my $host = shift; - return $host->{observed_state} && $host->{observed_state} eq "unreachable"; + return $self; } -sub set_status { shift->_set_field("status", @_); } -sub set_ip { shift->_set_field("hostip", @_); } # throws 'dup' -sub set_http_port { shift->_set_field("http_port", @_); } -sub set_http_get_port { shift->_set_field("http_get_port", @_); } -sub set_alt_ip { shift->_set_field("altip", @_); } -sub set_alt_mask { shift->_set_field("altmask", @_); } - -# for test suite. set fields in memory, without a MogileFS::Store -sub t_init { - my $self = shift; - my $status = shift; - # TODO: once we have a MogileFS::HostState, update this to - # validate it. not so important for now, though, since - # typos in tests will just make tests fail. - $self->{status} = $status; - $self->{_loaded} = 1; - $self->{observed_state} = "reachable"; +sub valid_state { + my ($class, $state) = @_; + return $state && $state =~ /^alive|dead|down$/; } -sub _set_field { - my ($self, $field, $val) = @_; - # $field is both the database column field and our member keys - $self->_load; - return 1 if $self->{$field} eq $val; - return 0 unless Mgd::get_store()->update_host_property($self->id, $field, $val); - $self->{$field} = $val; - MogileFS::Host->invalidate_cache; - return 1; -} +# Instance methods: -sub http_port { - my $host = shift; - $host->_load; - return $host->{http_port}; -} +sub id { $_[0]{hostid} } +sub name { $_[0]{hostname} } +sub hostname { $_[0]{hostname} } +sub hostip { $_[0]{hostip} } +sub status { $_[0]{status} } +sub http_port { $_[0]{http_port} } sub http_get_port { - my $host = shift; - $host->_load; - return $host->{http_get_port} || $host->{http_port}; + return $_[0]->{http_get_port} || $_[0]->{http_port}; } sub ip { - my $host = shift; - $host->_load; - if ($host->{mask} && $host->{altip} && + my $self = shift; + if ($self->{mask} && $self->{altip} && ($MogileFS::REQ_altzone || ($MogileFS::REQ_client_ip && - $host->{mask}->match($MogileFS::REQ_client_ip)))) { - return $host->{altip}; + $self->{mask}->match($MogileFS::REQ_client_ip)))) { + return $self->{altip}; } else { - return $host->{hostip}; + return $self->{hostip}; } } -sub field { - my ($host, $k) = @_; - $host->_load; - # TODO: validate $k to be in certain set of allowed keys? - return $host->{$k}; -} - -sub status { - my $host = shift; - $host->_load; - return $host->{status}; +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + return { map { $_ => $self->{$_} } @tofetch }; } -sub hostname { - my $host = shift; - $host->_load; - return $host->{hostname}; +sub observed_fields { + return $_[0]->fields(@observed_fields); } sub should_get_new_files { - my $host = shift; - return $host->status eq "alive"; + return $_[0]->status eq 'alive'; } -sub is_marked_down { - my $host = shift; - die "FIXME"; - # ... -} - -sub exists { - my $host = shift; - $host->_try_load; - return $host->{_loaded}; -} - -sub overview_hashref { - my $host = shift; - $host->_load; - my $ret = {}; - foreach my $k (qw(hostid status http_port http_get_port hostname hostip altip altmask)) { - $ret->{$k} = $host->{$k}; - } - return $ret; +sub observed_reachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'reachable'; } -sub delete { - my $host = shift; - my $rv = Mgd::get_store()->delete_host($host->id); - MogileFS::Host->invalidate_cache; - return $rv; +sub observed_unreachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; } # returns/creates a MogileFS::Connection::Mogstored object to the @@ -259,33 +100,4 @@ sub sidechannel_port { MogileFS->config("mogstored_stream_port"); } -# class method -sub valid_state { - my ($class, $state) = @_; - return $state && $state =~ /^alive|dead|down$/; -} - -# class method. valid host state, for newly created hosts? -# currently equal to valid_state. -sub valid_initial_state { - my ($class, $state) = @_; - return $class->valid_state($state); -} - -# -------------------------------------------------------------------------- - -sub _load { - return if $_[0]{_loaded}; - MogileFS::Host->reload_hosts; - return if $_[0]{_loaded}; - my $host = shift; - croak "Host $host->{hostid} doesn't exist.\n"; -} - -sub _try_load { - return if $_[0]{_loaded}; - MogileFS::Host->reload_hosts; -} - - 1; diff --git a/lib/MogileFS/NewClass.pm b/lib/MogileFS/NewClass.pm deleted file mode 100644 index ea567a51..00000000 --- a/lib/MogileFS/NewClass.pm +++ /dev/null @@ -1,59 +0,0 @@ -package MogileFS::NewClass; -use strict; -use warnings; -use MogileFS::Util qw(throw); - -=head1 - -MogileFS::NewClass - Class class. - -=cut - -sub new_from_args { - my ($class, $args, $domain_factory) = @_; - return bless { - domain_factory => $domain_factory, - mindevcount => 2, - %{$args}, - }, $class; -} - -# Instance methods: - -sub id { $_[0]{classid} } -sub name { $_[0]{classname} } -sub mindevcount { $_[0]{mindevcount} } -sub dmid { $_[0]{dmid} } - -sub repl_policy_string { - my $self = shift; - return $self->{replpolicy} ? $self->{replpolicy} - : 'MultipleHosts()'; -} - -sub repl_policy_obj { - my $self = shift; - if (! $self->{_repl_policy_obj}) { - my $polstr = $self->repl_policy_string; - # Parses the string. - $self->{_repl_policy_obj} = - MogileFS::ReplicationPolicy->new_from_policy_string($polstr); - } - return $self->{_repl_policy_obj}; -} - -sub domain { - my $self = shift; - return $self->{domain_factory}->get_by_id($self->{dmid}); -} - -sub has_files { - my $self = shift; - return Mgd::get_store()->class_has_files($self->{dmid}, $self->id); -} - -sub observed_fields { - return {}; -} - -1; diff --git a/lib/MogileFS/NewDevice.pm b/lib/MogileFS/NewDevice.pm deleted file mode 100644 index 5ae6dec7..00000000 --- a/lib/MogileFS/NewDevice.pm +++ /dev/null @@ -1,310 +0,0 @@ -package MogileFS::NewDevice; -use strict; -use warnings; -use Carp qw/croak/; -use MogileFS::Util qw(throw); -use MogileFS::Util qw(okay_args device_state error); - -=head1 - -MogileFS::NewDevice - device class - -=cut - -BEGIN { - my $testing = $ENV{TESTING} ? 1 : 0; - eval "sub TESTING () { $testing }"; -} - -my @observed_fields = qw/observed_state utilization/; -my @fields = (qw/hostid status weight mb_total mb_used mb_asof devid/, - @observed_fields); - -sub new_from_args { - my ($class, $args, $host_factory) = @_; - my $self = bless { - host_factory => $host_factory, - %{$args}, - }, $class; - - # FIXME: No guarantee (as of now?) that hosts get loaded before devs. - #$self->host || die "No host for $self->{devid} (host $self->{hostid})"; - - croak "invalid device observed state '$self->{observed_state}', valid: writeable, readable, unreachable" - if $self->{observed_state} && $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; - - return $self; -} - -# Instance methods - -sub id { return $_[0]{devid} } -sub devid { return $_[0]{devid} } -sub name { return $_[0]{devid} } -sub status { return $_[0]{status} } -sub weight { return $_[0]{weight} } -sub hostid { return $_[0]{hostid} } - -# FIXME: This shouldn't be necessary anymore? -sub t_init { - my ($self, $hostid, $state) = @_; - - my $dstate = device_state($state) or - die "Bogus state"; - - $self->{hostid} = $hostid; - $self->{status} = $state; - $self->{observed_state} = "writeable"; - - # say it's 10% full, of 1GB - $self->{mb_total} = 1000; - $self->{mb_used} = 100; -} - -sub host { - my $self = shift; - return $self->{host_factory}->get_by_id($self->{hostid}); -} - -# returns 0 if not known, else [0,1] -sub percent_free { - my $self = shift; - return 0 unless $self->{mb_total} && defined $self->{mb_used}; - return 1 - ($self->{mb_used} / $self->{mb_total}); -} - -# returns undef if not known, else [0,1] -sub percent_full { - my $self = shift; - return undef unless $self->{mb_total} && defined $self->{mb_used}; - return $self->{mb_used} / $self->{mb_total}; -} - -# FIXME: $self->mb_free? -sub fields { - my $self = shift; - my @tofetch = @_ ? @_ : @fields; - my $ret = { (map { $_ => $self->{$_} } @tofetch), - 'mb_free' => $self->mb_free }; - return $ret; -} - -sub observed_fields { - return $_[0]->fields(@observed_fields); -} - -sub observed_utilization { - my $self = shift; - - if (TESTING) { - my $weight_varname = 'T_FAKE_IO_DEV' . $self->id; - return $ENV{$weight_varname} if defined $ENV{$weight_varname}; - } - - return $self->{utilization}; -} - -sub observed_writeable { - my $self = shift; - return 0 unless $self->{observed_state} && $self->{observed_state} eq 'writeable'; - my $host = $self->host or return 0; - return 0 unless $host->observed_reachable; - return 1; -} - -sub observed_readable { - my $self = shift; - return $self->{observed_state} && $self->{observed_state} eq 'readable'; -} - -sub observed_unreachable { - my $self = shift; - return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; -} - -# FIXME: This pattern is weird. Store the object on new? -sub dstate { - my $ds = device_state($_[0]->status); - return $ds if $ds; - error("dev$_[0]->{devid} has bogus status '$_[0]->{status}', pretending 'down'"); - return device_state("down"); -} - -sub can_delete_from { - return $_[0]->dstate->can_delete_from; -} - -sub can_read_from { - return $_[0]->dstate->can_read_from; -} - -# FIXME: Is there a (unrelated to this code) bug where new files aren't tested -# against the free space limit before being stored or replicated somewhere? -sub should_get_new_files { - my $self = shift; - my $dstate = $self->dstate; - - return 0 unless $dstate->should_get_new_files; - return 0 unless $self->observed_writeable; - return 0 unless $self->host->should_get_new_files; - # have enough disk space? (default: 100MB) - my $min_free = MogileFS->config("min_free_space"); - return 0 if $self->{mb_total} && - $self->mb_free < $min_free; - - return 1; -} - -sub mb_free { - my $self = shift; - return $self->{mb_total} - $self->{mb_used} - if $self->{mb_total} && $self->{mb_used}; -} - -sub mb_used { - return $_[0]->{mb_used}; -} - -# currently the same policy, but leaving it open for differences later. -sub should_get_replicated_files { - return $_[0]->should_get_new_files; -} - -sub not_on_hosts { - my ($self, @hosts) = @_; - my @hostids = map { ref($_) ? $_->id : $_ } @hosts; - my $my_hostid = $self->id; - return (grep { $my_hostid == $_ } @hostids) ? 0 : 1; -} - -# "cached" by nature of the monitor worker testing this. -sub doesnt_know_mkcol { - return $_[0]->{no_mkcol}; -} - -# Gross class-based singleton cache. -my %dir_made; # /dev/path -> $time -my $dir_made_lastclean = 0; -# returns 1 on success, 0 on failure -sub create_directory { - my ($self, $uri) = @_; - return 1 if $self->doesnt_know_mkcol; - - # rfc2518 says we "should" use a trailing slash. Some servers - # (nginx) appears to require it. - $uri .= '/' unless $uri =~ m/\/$/; - - return 1 if $dir_made{$uri}; - - my $hostid = $self->hostid; - my $host = $self->host; - my $hostip = $host->ip or return 0; - my $port = $host->http_port or return 0; - my $peer = "$hostip:$port"; - - my $sock = IO::Socket::INET->new(PeerAddr => $peer, Timeout => 1) - or return 0; - - print $sock "MKCOL $uri HTTP/1.0\r\n". - "Content-Length: 0\r\n\r\n"; - - my $ans = <$sock>; - - # if they don't support this method, remember that - if ($ans && $ans =~ m!HTTP/1\.[01] (400|501)!) { - $self->{no_mkcol} = 1; - # TODO: move this into method in *monitor* worker - return 1; - } - - return 0 unless $ans && $ans =~ m!^HTTP/1.[01] 2\d\d!; - - my $now = time(); - $dir_made{$uri} = $now; - - # cleanup %dir_made occasionally. - my $clean_interval = 300; # every 5 minutes. - if ($dir_made_lastclean < $now - $clean_interval) { - $dir_made_lastclean = $now; - foreach my $k (keys %dir_made) { - delete $dir_made{$k} if $dir_made{$k} < $now - 3600; - } - } - return 1; -} - -sub fid_list { - my ($self, %opts) = @_; - my $limit = delete $opts{limit}; - croak("No limit specified") unless $limit && $limit =~ /^\d+$/; - croak("Unknown options to fid_list") if %opts; - - my $sto = Mgd::get_store(); - my $fidids = $sto->get_fidids_by_device($self->devid, $limit); - return map { - MogileFS::FID->new($_) - } @{$fidids || []}; -} - -sub fid_chunks { - my ($self, %opts) = @_; - - my $sto = Mgd::get_store(); - # storage function does validation. - my $fidids = $sto->get_fidid_chunks_by_device(devid => $self->devid, %opts); - return map { - MogileFS::FID->new($_) - } @{$fidids || []}; -} - -sub forget_about { - my ($self, $fid) = @_; - Mgd::get_store()->remove_fidid_from_devid($fid->id, $self->id); - return 1; -} - -sub usage_url { - my $self = shift; - my $host = $self->host; - my $get_port = $host->http_get_port; - my $hostip = $host->ip; - return "http://$hostip:$get_port/dev$self->{devid}/usage"; -} - -sub can_change_to_state { - my ($self, $newstate) = @_; - # don't allow dead -> alive transitions. (yes, still possible - # to go dead -> readonly -> alive to bypass this, but this is - # all more of a user-education thing than an absolute policy) - return 0 if $self->dstate->is_perm_dead && $newstate eq 'alive'; - return 1; -} - -sub vivify_directories { - my ($self, $path) = @_; - - # $path is something like: - # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid - - # three directories we'll want to make: - # http://10.0.0.26:7500/dev2/0 - # http://10.0.0.26:7500/dev2/0/000 - # http://10.0.0.26:7500/dev2/0/000/148 - - croak "non-HTTP mode no longer supported" unless $path =~ /^http/; - return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; - my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); - - die "devid mismatch" unless $self->id == $devid; - - $self->create_directory("/dev$devid/$p1"); - $self->create_directory("/dev$devid/$p1/$p2"); - $self->create_directory("/dev$devid/$p1/$p2/$p3"); -} - -# FIXME: Remove this once vestigial code is removed. -sub set_observed_utilization { - return 1; -} - -1; diff --git a/lib/MogileFS/NewDomain.pm b/lib/MogileFS/NewDomain.pm deleted file mode 100644 index 06902a60..00000000 --- a/lib/MogileFS/NewDomain.pm +++ /dev/null @@ -1,45 +0,0 @@ -package MogileFS::NewDomain; -use strict; -use warnings; -use MogileFS::Util qw(throw); - -=head1 - -MogileFS::NewDomain - domain class. - -=cut - -sub new_from_args { - my ($class, $args, $class_factory) = @_; - return bless { - class_factory => $class_factory, - %{$args}, - }, $class; -} - -# Instance methods: - -sub id { $_[0]{dmid} } -sub name { $_[0]{namespace} } - -sub has_files { - my $self = shift; - return 1 if $Mgd::_T_DOM_HAS_FILES; - return Mgd::get_store()->domain_has_files($self->id); -} - -sub classes { - my $self = shift; - return $self->{class_factory}->get_all($self); -} - -sub class { - my $self = shift; - return $self->{class_factory}->get_by_name($self, $_[0]); -} - -sub observed_fields { - return {}; -} - -1; diff --git a/lib/MogileFS/NewHost.pm b/lib/MogileFS/NewHost.pm deleted file mode 100644 index 71970c27..00000000 --- a/lib/MogileFS/NewHost.pm +++ /dev/null @@ -1,110 +0,0 @@ -package MogileFS::NewHost; -use strict; -use warnings; -use MogileFS::Util qw(throw); -use Net::Netmask; -use Carp qw(croak); -use MogileFS::Connection::Mogstored; - -=head1 - -MogileFS::NewHost - host class - -=cut - -# Centralized here instead of three places. -my @observed_fields = qw/observed_state/; -my @fields = (qw/hostid hostname hostip status http_port http_get_port altip altmask/, - @observed_fields); - -# TODO: Validate a few things: state, observed state. -sub new_from_args { - my ($class, $args, $dev_factory) = @_; - my $self = bless { - dev_factory => $dev_factory, - %{$args}, - }, $class; - - $self->{mask} = ($self->{altip} && $self->{altmask}) ? - Net::Netmask->new2($self->{altmask}) : undef; - - return $self; -} - -sub valid_state { - my ($class, $state) = @_; - return $state && $state =~ /^alive|dead|down$/; -} - -# Instance methods: - -sub id { $_[0]{hostid} } -sub name { $_[0]{hostname} } -sub hostname { $_[0]{hostname} } -sub hostip { $_[0]{hostip} } -sub status { $_[0]{status} } -sub http_port { $_[0]{http_port} } - -sub http_get_port { - return $_[0]->{http_get_port} || $_[0]->{http_port}; -} - -sub ip { - my $self = shift; - if ($self->{mask} && $self->{altip} && - ($MogileFS::REQ_altzone || ($MogileFS::REQ_client_ip && - $self->{mask}->match($MogileFS::REQ_client_ip)))) { - return $self->{altip}; - } else { - return $self->{hostip}; - } -} - -sub fields { - my $self = shift; - my @tofetch = @_ ? @_ : @fields; - return { map { $_ => $self->{$_} } @tofetch }; -} - -sub observed_fields { - return $_[0]->fields(@observed_fields); -} - -sub should_get_new_files { - return $_[0]->status eq 'alive'; -} - -sub t_init { - my $self = shift; - my $status = shift; - $self->{status} = $status; - $self->{observed_state} = "reachable"; -} - -sub observed_reachable { - my $self = shift; - return $self->{observed_state} && $self->{observed_state} eq 'reachable'; -} - -sub observed_unreachable { - my $self = shift; - return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; -} - -# returns/creates a MogileFS::Connection::Mogstored object to the -# host's mogstored management/side-channel port (which starts -# unconnected, and only connects when you ask it to, with its sock -# method) -sub mogstored_conn { - my $self = shift; - return $self->{mogstored_conn} ||= - MogileFS::Connection::Mogstored->new($self->ip, $self->sidechannel_port); -} - -sub sidechannel_port { - # TODO: let this be configurable per-host? currently it's configured - # once for all machines. - MogileFS->config("mogstored_stream_port"); -} - -1; diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index d7ef8d5c..33ffeec8 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -68,17 +68,13 @@ use MogileFS::Factory::Domain; use MogileFS::Factory::Class; use MogileFS::Factory::Host; use MogileFS::Factory::Device; -use MogileFS::NewDomain; -use MogileFS::NewClass; -use MogileFS::NewHost; -use MogileFS::NewDevice; - -use MogileFS::HTTPFile; +use MogileFS::Domain; use MogileFS::Class; -use MogileFS::Device; use MogileFS::Host; +use MogileFS::Device; + +use MogileFS::HTTPFile; use MogileFS::FID; -use MogileFS::Domain; use MogileFS::DevFID; use MogileFS::Store; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index a3843c0c..30b7d2d8 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -69,15 +69,11 @@ sub work { debug("Monitor running; checking DB for updates"); $self->validate_dbh; - my $new_data = {}; - my $prev_data = $self->{prev_data}; my $db_data = $self->grab_all_data; - # Stack this up to ship back later. - my @events = (); - $self->diff_data($db_data, $prev_data, $new_data, \@events); + # Stack diffs to ship back later + $self->diff_data($db_data); - $self->{prev_data} = $new_data; $self->send_events_to_parent; $self->send_to_parent(":monitor_just_ran"); Danga::Socket->AddTimer(4, $db_monitor); @@ -98,7 +94,6 @@ sub work { my $dev_factory = MogileFS::Factory::Device->get_factory(); my $cur_iow = {}; - my @events = (); # Run check_devices to test host/devs. diff against old values. for my $dev ($dev_factory->get_all) { if (my $state = $self->is_iow_diff($dev)) { @@ -106,7 +101,7 @@ sub work { } $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev, \@events); + $self->check_device($dev); } $self->{devutil}->{prev} = $cur_iow; @@ -141,7 +136,8 @@ sub send_events_to_parent { } return unless @flat; $self->{events} = []; - print STDERR "SENDING STATE CHANGES ", join(' ', ':monitor_events', @flat), "\n"; + # TODO: Maybe wasting too much CPU building this debug line every time... + debug("sending state changes " . join(' ', ':monitor_events', @flat), 2); $self->send_to_parent(join(' ', ':monitor_events', @flat)); } @@ -172,8 +168,10 @@ sub is_iow_diff { } sub diff_data { - my ($self, $db_data, $prev_data, $new_data, $ev) = @_; + my ($self, $db_data) = @_; + my $new_data = {}; + my $prev_data = $self->{prev_data}; for my $type (keys %{$db_data}) { my $d_data = $db_data->{$type}; my $p_data = $prev_data->{$type}; @@ -199,6 +197,7 @@ sub diff_data { $new_data->{$type} = $n_data; } + $self->{prev_data} = $new_data; } # returns 1 if the hashes are different. @@ -208,11 +207,12 @@ sub diff_hash { my %keys = (); map { $keys{$_}++ } keys %$old, keys %$new; for my $k (keys %keys) { - return 1 unless ((exists $old->{$k} && - exists $new->{$k}) && - ( (! defined $old->{$k} && ! defined $new->{$k}) || - ($old->{$k} eq $new->{$k}) ) - ); + return 1 if (exists $old->{$k} && ! exists $new->{$k}); + return 1 if (exists $new->{$k} && ! exists $old->{$k}); + return 1 if (defined $old->{$k} && ! defined $new->{$k}); + return 1 if (defined $new->{$k} && ! defined $old->{$k}); + return 0 if (! defined $new->{$k} && ! defined $old->{$k}); + return 1 if ($old->{$k} ne $new->{$k}); } return 0; } @@ -244,7 +244,7 @@ sub ua { } sub check_device { - my ($self, $dev, $ev) = @_; + my ($self, $dev) = @_; my $devid = $dev->id; my $host = $dev->host; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index c4c5fe05..79ec23dc 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -529,8 +529,8 @@ sub cmd_file_debug { } if ($fid) { - $fid->{domain} = Mgd::domain_factory()->get_by_id->($fid->{dmid})->name; - $fid->{class} = Mgd::class_factory()->get_by_id->($fid->{dmid}, + $fid->{domain} = Mgd::domain_factory()->get_by_id($fid->{dmid})->name; + $fid->{class} = Mgd::class_factory()->get_by_id($fid->{dmid}, $fid->{classid})->name; } @@ -585,8 +585,8 @@ sub cmd_file_info { my $ret = {}; $ret->{fid} = $fid->id; - $ret->{domain} = Mgd::domain_factory->get_by_id($fid->dmid)->name; - $ret->{class} = Mgd::class_factory->get_by_id($fid->dmid, + $ret->{domain} = Mgd::domain_factory()->get_by_id($fid->dmid)->name; + $ret->{class} = Mgd::class_factory()->get_by_id($fid->dmid, $fid->classid)->name; $ret->{key} = $key; $ret->{'length'} = $fid->length; diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 855867f9..bb55f568 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -10,7 +10,6 @@ use fields ( use List::Util (); use MogileFS::Util qw(error every debug); use MogileFS::Config; -use MogileFS::Class; use MogileFS::ReplicationRequest qw(rr_upgrade); # setup the value used in a 'nexttry' field to indicate that this item will never diff --git a/t/01-domain-class.t b/t/01-domain-class.t index e3133d63..51e5088a 100644 --- a/t/01-domain-class.t +++ b/t/01-domain-class.t @@ -11,8 +11,8 @@ use MogileFS::Test; use MogileFS::Factory; use MogileFS::Factory::Domain; use MogileFS::Factory::Class; -use MogileFS::NewDomain; -use MogileFS::NewClass; +use MogileFS::Domain; +use MogileFS::Class; use Data::Dumper qw/Dumper/; @@ -46,7 +46,7 @@ ok($domfac != $classfac, "factories are not the same singleton"); ok($cls, "got a class object"); is($cls->id, 1, "class id is 1"); is($cls->name, 'fried', 'class name is fried'); - is(ref($cls->domain), 'MogileFS::NewDomain', + is(ref($cls->domain), 'MogileFS::Domain', 'class can find a domain object'); } @@ -76,7 +76,7 @@ ok($domfac != $classfac, "factories are not the same singleton"); my @doms = $domfac->get_all; is(scalar(@doms), 2, 'got two domains back from get_all'); for (@doms) { - is(ref($_), 'MogileFS::NewDomain', 'and both are domains'); + is(ref($_), 'MogileFS::Domain', 'and both are domains'); } isnt($doms[0]->id, $doms[1]->id, 'and both are not the same'); } @@ -97,7 +97,8 @@ ok($domfac != $classfac, "factories are not the same singleton"); { my $dom = $domfac->get_by_name('harro'); my @classes = $dom->classes; - is(scalar(@classes), 2, 'found two classes'); + # Magic "default" class is included + is(scalar(@classes), 3, 'found three classes'); ok($dom->class('blue'), 'found the blue class'); ok(!$dom->class('fried'), 'did not find the fried class'); diff --git a/t/02-host-device.t b/t/02-host-device.t index bc2f1577..7a7ab9f2 100644 --- a/t/02-host-device.t +++ b/t/02-host-device.t @@ -11,8 +11,8 @@ use MogileFS::Test; use MogileFS::Factory; use MogileFS::Factory::Host; use MogileFS::Factory::Device; -use MogileFS::NewHost; -use MogileFS::NewDevice; +use MogileFS::Host; +use MogileFS::Device; use Data::Dumper qw/Dumper/; @@ -79,6 +79,8 @@ observed_state => 'writeable'}); 'updated dev1 DB entry'); ok($sto->update_device(2, { mb_total => 100, mb_used => 3, status => 'dead' }), 'updated dev2 DB entry'); + + # Test duplication errors. } { diff --git a/t/domains-classes.t b/t/domains-classes.t deleted file mode 100644 index dda580b4..00000000 --- a/t/domains-classes.t +++ /dev/null @@ -1,78 +0,0 @@ -# -*-perl-*- - -use strict; -use warnings; -use Test::More; -use FindBin qw($Bin); - -use MogileFS::Server; -use MogileFS::Util qw(error_code); -use MogileFS::Test; - -my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 26; -} else { - plan skip_all => "Can't create temporary test database: $@"; - exit 0; -} - -is(scalar MogileFS::Domain->domains, 0, "no domains at present"); - -my $dom = MogileFS::Domain->create("foo"); -ok($dom, "created a domain"); - -my $dup = eval { MogileFS::Domain->create("foo") }; -ok(!$dup, "didn't create it"); -is(error_code($@), "dup", "because it was a duplicate domain"); - -is(scalar MogileFS::Domain->domains, 1, "one domain now"); -$dom->delete; -is(scalar MogileFS::Domain->domains, 0, "back to zero domains"); - -$dom = MogileFS::Domain->create("foo"); -ok($dom, "created foo domain again"); -is(scalar MogileFS::Domain->domains, 1, "back to one domain"); - -{ - local $Mgd::_T_DOM_HAS_FILES = 1; - ok(!eval{ $dom->delete; }, "failed to delete domain"); - is(error_code($@), "has_files", "because it had files"); -} - -my @classes = $dom->classes; -is(scalar @classes, 1, "one class in domain") - or die; -is($classes[0]->name, "default", "is the default class"); -is($classes[0]->classid, 0, ".. of classid 0"); -ok(defined $classes[0]->classid, ".. which is defined"); - -my $cla = $dom->create_class("classA"); -ok($cla, "created classA"); -is(scalar($dom->classes), 2, "two classes now"); - -my $clb = $dom->create_class("classB"); -ok($clb, "created classB"); -is(scalar($dom->classes), 3, "three classes now"); - -{ - my $dup = eval { $dom->create_class("classA") }; # can't create this again - ok(!$dup, "didn't create dup of A"); - is(error_code($@), "dup", "because it was a dup"); -} - -ok($clb->set_name("classB2"), "renamed classB to B2"); -is($clb->name, "classB2", "and it renamed"); - -ok(!eval { $clb->set_name("classA") }, "failed to rename B2 to classA"); -is(error_code($@), "dup", "because it was a dup"); - -ok($clb->delete, "deleted class"); -is(scalar($dom->classes), 2, "two classes now"); - - - - - - - diff --git a/t/hosts-devices.t b/t/hosts-devices.t deleted file mode 100644 index 5066985d..00000000 --- a/t/hosts-devices.t +++ /dev/null @@ -1,50 +0,0 @@ -# -*-perl-*- - -use strict; -use warnings; -use Test::More; -use FindBin qw($Bin); - -use MogileFS::Server; -use MogileFS::Util qw(error_code); -use MogileFS::Test; - -my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 18; -} else { - plan skip_all => "Can't create temporary test database: $@"; - exit 0; -} - -is(scalar MogileFS::Host->hosts, 0, "no hosts at present"); -is(scalar MogileFS::Device->devices, 0, "no devices at present"); - -my $ha = MogileFS::Host->create("a", "10.0.0.1"); -ok($ha, "made hostA"); -my $hb = MogileFS::Host->create("b", "10.0.0.2"); -ok($hb, "made hostB"); -ok(!eval{ MogileFS::Host->create("b", "10.0.0.3") }, "can't dup hostB's name"); -is(error_code($@), "dup", "yup, was a dup"); -ok(!eval{ MogileFS::Host->create("c", "10.0.0.2") }, "can't dup hostB's IP"); -is(error_code($@), "dup", "yup, was a dup"); - -ok($hb->set_ip("10.0.0.4"), "set IP"); -is($hb->ip, "10.0.0.4", "IP matches"); -ok(!eval{$hb->set_ip("10.0.0.1")}, "IP's taken"); -is(error_code($@), "dup", "yup, was a dup"); - -is(scalar MogileFS::Host->hosts, 2, "2 hosts now"); -ok($ha->delete, "deleted hostA"); -is(scalar MogileFS::Host->hosts, 1, "1 host now"); - -my $da = MogileFS::Device->create(devid => 1, - hostid => $hb->id, - status => "alive"); -ok($da, "made dev1"); -ok($da->not_on_hosts($ha), "dev1 not on ha"); -ok(!$da->not_on_hosts($hb), "dev1 is on hb"); - - - - diff --git a/t/multiple-hosts-replpol.t b/t/multiple-hosts-replpol.t index 4aa870e6..404fbd2a 100644 --- a/t/multiple-hosts-replpol.t +++ b/t/multiple-hosts-replpol.t @@ -69,9 +69,11 @@ sub rr { my ($state) = @_; my $ostate = $state; # original - MogileFS::Host->t_wipe_singletons; - MogileFS::Device->t_wipe_singletons; + MogileFS::Factory::Host->t_wipe; + MogileFS::Factory::Device->t_wipe; MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + my $hfac = MogileFS::Factory::Host->get_factory; + my $dfac = MogileFS::Factory::Device->get_factory; my $min = 2; if ($state =~ s/^\bmin=(\d+)\b//) { @@ -90,17 +92,19 @@ sub rr { $opts ||= ""; die "dup host $n" if $hosts->{$n}; - my $h = $hosts->{$n} = MogileFS::Host->of_hostid($n); - $h->t_init($opts || "alive"); + my $h = $hosts->{$n} = $hfac->set({ hostid => $n, + status => ($opts || "alive"), observed_state => "reachable", + hostname => $n }); foreach my $ddecl (split(/\s+/, $devstr)) { $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ or $parse_error->(); my ($dn, $on_not, $status) = ($1, $2, $3); die "dup device $dn" if $devs->{$dn}; - my $d = $devs->{$dn} = MogileFS::Device->of_devid($dn); - $status ||= "alive"; - $d->t_init($h->id, $status); + my $d = $devs->{$dn} = $dfac->set({ devid => $dn, + hostid => $h->id, observed_state => "writeable", + status => ($status || "alive"), mb_total => 1000, + mb_used => 100, }); if ($on_not eq "X" && $d->dstate->should_have_files) { push @$on_devs, $d; } diff --git a/t/store.t b/t/store.t index 77ec6c3a..593f1ca5 100644 --- a/t/store.t +++ b/t/store.t @@ -17,10 +17,10 @@ if ($sto) { exit 0; } -my $dom = MogileFS::Domain->create("foo"); -ok($dom, "created a domain"); -my $cls = $dom->create_class("classA"); -ok($cls, "created a class"); +my $dmid = $sto->create_domain("foo"); +ok($dmid, "created a domain"); +my $clsid = $sto->create_class($dmid, "classA"); +ok($clsid, "created a class"); my $df = MogileFS::DevFID->new(100, 200); ok($df, "made devfid"); @@ -42,9 +42,9 @@ is(scalar @on, 2, "FID 101 on 2 devices"); { my $fidid = $sto->register_tempfile( fid => undef, - dmid => $dom->id, + dmid => $dmid, key => "my_tempfile", - classid => $cls->classid, + classid => $clsid, devids => join(',', 1,2,3), ); ok($fidid, "got a fidid"); @@ -52,9 +52,9 @@ is(scalar @on, 2, "FID 101 on 2 devices"); my $fidid2 = eval { $sto->register_tempfile( fid => $fidid, - dmid => $dom->id, + dmid => $dmid, key => "my_tempfile", - classid => $cls->classid, + classid => $clsid, devids => join(',', 1,2,3), ); }; From 64e2b37f23fc38fa39f73b3ceb1b60fa6848ede2 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 1 Jun 2011 15:28:48 -0700 Subject: [PATCH 061/405] Update MANIFEST with new files --- MANIFEST | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/MANIFEST b/MANIFEST index 34f17819..84f32ad3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -78,6 +78,11 @@ lib/MogileFS/Store/SQLite.pm lib/MogileFS/Sys.pm lib/MogileFS/Test.pm lib/MogileFS/Util.pm +lib/MogileFS/Factory.pm +lib/MogileFS/Factory/Domain.pm +lib/MogileFS/Factory/Class.pm +lib/MogileFS/Factory/Host.pm +lib/MogileFS/Factory/Device.pm lib/MogileFS/Worker.pm lib/MogileFS/Worker/Delete.pm lib/MogileFS/Worker/Fsck.pm @@ -104,12 +109,12 @@ mogdbsetup mogilefsd mogstored t/00-startup.t +t/01-domain-class.t +t/02-host-device.t t/10-weighting.t t/20-filepaths.t t/30-rebalance.t -t/domains-classes.t t/fid-stat.t -t/hosts-devices.t t/mogstored-shutdown.t t/multiple-hosts-replpol.t t/replpolicy-parsing.t From 4843d8011391195b89d54dee135c49f6977ac921 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 1 Jun 2011 15:32:11 -0700 Subject: [PATCH 062/405] Re-enable test write throttling. --- lib/MogileFS/Worker/Monitor.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 30b7d2d8..e43124e0 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -318,8 +318,7 @@ sub check_device { } # next if we're not going to try this now - # FIXME: Uncomment this to throttle test writes again. - #return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; + return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; $self->{last_test_write}{$devid} = $now; # now we want to check if this device is writeable From 18d8511099b9de824ad12eff6252dea593785018 Mon Sep 17 00:00:00 2001 From: Jason Mills Date: Fri, 27 May 2011 02:00:49 -0700 Subject: [PATCH 063/405] Add support for config file includes for Mogstored under lighttpd --- lib/Mogstored/HTTPServer.pm | 1 + lib/Mogstored/HTTPServer/Lighttpd.pm | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/lib/Mogstored/HTTPServer.pm b/lib/Mogstored/HTTPServer.pm index 5a8123a0..96661ff8 100644 --- a/lib/Mogstored/HTTPServer.pm +++ b/lib/Mogstored/HTTPServer.pm @@ -7,6 +7,7 @@ sub new { $self->{listen} = delete $opts{listen}; $self->{maxconns} = delete $opts{maxconns}; $self->{bin} = delete $opts{bin}; + $self->{include} = delete $opts{include}; die "unknown opts" if %opts; return $self; } diff --git a/lib/Mogstored/HTTPServer/Lighttpd.pm b/lib/Mogstored/HTTPServer/Lighttpd.pm index f0fd4270..76c492a3 100644 --- a/lib/Mogstored/HTTPServer/Lighttpd.pm +++ b/lib/Mogstored/HTTPServer/Lighttpd.pm @@ -41,6 +41,9 @@ sub start { my $portnum = $self->listen_port; my $bind_ip = $self->bind_ip; + + my $include_line = sprintf('include "%s"', $self->{include}) + if $self->{include}; print $fh qq{ server.document-root = "$self->{docroot}" @@ -49,6 +52,7 @@ server.bind = "$bind_ip" server.modules = ( "mod_webdav", "mod_status" ) webdav.activate = "enable" status.status-url = "/" +$include_line }; exec $exe, "-D", "-f", $filename; From b76fff64406dd9ba731f8dbac098361d5193da01 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 06:16:20 +0000 Subject: [PATCH 064/405] Add support for dbroot{user,pass} during test phases. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Test.pm | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 75ab2466..fb461170 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -47,6 +47,8 @@ sub temp_store { my $user = $ENV{MOGTEST_DBUSER} || ''; my $pass = $ENV{MOGTEST_DBPASS} || ''; my $name = $ENV{MOGTEST_DBNAME} || ''; + my $rootuser = $ENV{MOGTEST_DBROOTUSER} || ''; + my $rootpass = $ENV{MOGTEST_DBROOTPASS} || ''; # default to mysql, but make sure DBD::MySQL is installed unless ($type) { @@ -61,8 +63,12 @@ sub temp_store { if ($@) { die "Failed to load $store: $@\n"; } - my $sto = $store->new_temp(dbhost => $host, dbport => $port, - dbuser => $user, dbpass => $pass, dbname => $name); + my %opts = ( dbhost => $host, dbport => $port, + dbuser => $user, dbpass => $pass, + dbname => $name); + $opts{dbrootuser} = $rootuser unless $rootuser eq ''; + $opts{dbrootpass} = $rootpass unless $rootpass eq ''; + my $sto = $store->new_temp(%opts); Mgd::set_store($sto); return $sto; } From 336308c429ab98adb66653a3849d6a7baff34b89 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 06:18:15 +0000 Subject: [PATCH 065/405] MySQL support for dbroot{user,pass}. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/MySQL.pm | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 14ec8911..fc14b4ff 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -191,9 +191,11 @@ sub new_temp { my $port = $args{dbport} || 3306; my $user = $args{dbuser} || 'root'; my $pass = $args{dbpass} || ''; + my $rootuser = $args{dbrootuser} || $args{dbuser} || 'root'; + my $rootpass = $args{dbrootpass} || $args{dbpass} || ''; my $sto = MogileFS::Store->new_from_dsn_user_pass("DBI:mysql:database=$dbname;host=$host;port=$port", - $user, $pass); + $rootuser, $rootpass); my $dbh = $sto->dbh; _create_mysql_db($dbh, $dbname); @@ -202,10 +204,17 @@ sub new_temp { $ENV{USE_UNSAFE_MYSQL} = 1 unless defined $ENV{USE_UNSAFE_MYSQL}; system("$FindBin::Bin/../mogdbsetup", "--yes", "--dbname=$dbname", - "--dbhost=$host", "--dbport=$port", "--dbrootuser=$user", - "--dbrootpass=$pass", "--dbuser=$user", "--dbpass=$pass") + "--dbhost=$host", "--dbport=$port", "--dbrootuser=$rootuser", + "--dbrootpass=$rootpass", "--dbuser=$user", "--dbpass=$pass") and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; + if($user ne $rootuser) { + $sto = MogileFS::Store->new_from_dsn_user_pass( + "DBI:mysql:database=$dbname;host=$host;port=$port", + $user, $pass); + $dbh = $sto->dbh; + } + $dbh->do("use $dbname"); return $sto; } From fcfea3ceb61904045f9d24812aaf3e7778efec41 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 08:02:52 +0000 Subject: [PATCH 066/405] Actually use the MOGTEST_* variables for Postgres testing... Signed-off-by: Robin H. Johnson --- TESTING | 12 +++++++++++- lib/MogileFS/Store/Postgres.pm | 36 ++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/TESTING b/TESTING index 28dbd4d7..03a9be4d 100644 --- a/TESTING +++ b/TESTING @@ -10,12 +10,13 @@ Testing under Postgresql: ------------------------- Database setup: # createuser -SRlD mogile -# createdb -E UTF8 -O mogile tmp_mogiletest +# createdb --template template0 -E UTF8 -O mogile tmp_mogiletest To run the tests: # make test \ MOGTEST_DBUSER=mogile \ MOGTEST_DBNAME=tmp_mogiletest \ + MOGTEST_DBROOTUSER=postgres \ MOGTEST_DBTYPE=Postgres Testing under SQLite: @@ -24,3 +25,12 @@ Testing under SQLite: MOGTEST_DBUSER=mogile \ MOGTEST_DBNAME=tmp_mogiletest \ MOGTEST_DBTYPE=SQLite + +Available test variables: +------------------------- +MOGTEST_DBNAME +MOGTEST_DBTYPE +MOGTEST_DBUSER +MOGTEST_DBPASS +MOGTEST_DBROOTUSER +MOGTEST_DBROOTPASS diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 9f27b067..0bd5b5a9 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -470,26 +470,42 @@ sub fid_type { # -------------------------------------------------------------------------- sub new_temp { - my $dbname = "tmp_mogiletest"; - _drop_db($dbname); - - system("$FindBin::Bin/../mogdbsetup", "--yes", "--dbname=$dbname", "--type=Postgres", "--dbrootuser=postgres") - and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; + my $self = shift; + my %args = @_; + my $dbname = $args{dbname} || "tmp_mogiletest"; + my $user = $args{dbuser} || 'mogile'; + my $pass = $args{dbpass} || ''; + my $rootuser = $args{dbrootuser} || $args{dbuser} || 'postgres'; + my $rootpass = $args{dbrootpass} || $args{dbpass} || ''; + _drop_db($dbname,$rootuser,$rootpass); + + my @args = ( "$FindBin::Bin/../mogdbsetup", "--yes", + "--dbname=$dbname", "--type=Postgres", + "--dbuser=$user", + "--dbrootuser=$rootuser", ); + push @args, "--dbpass=$pass" unless $pass eq ''; + push @args, "--dbrootpass=$rootpass" unless $rootpass eq ''; + system(@args) + and die "Failed to run mogdbsetup (".join(' ',map { "'".$_."'" } @args).")."; return MogileFS::Store->new_from_dsn_user_pass("dbi:Pg:dbname=$dbname", - "mogile", - ""); + $user, + $pass); } my $rootdbh; sub _root_dbh { - return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres", "postgres", "", { RaiseError => 1 }) - or die "Couldn't connect to local PostgreSQL database as postgres"; + my $rootuser = shift; + my $rootpass = shift; + return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres", $rootuser, $rootpass, { RaiseError => 1 }) + or die "Couldn't connect to local PostgreSQL database as $rootuser"; } sub _drop_db { my $dbname = shift; - my $root_dbh = _root_dbh(); + my $rootuser = shift; + my $rootpass = shift; + my $root_dbh = _root_dbh($rootuser, $rootpass); eval { $root_dbh->do("DROP DATABASE $dbname"); }; From 1f083a9b0ef5c9af6b9d02558d3bffd4bca6c223 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 08:04:11 +0000 Subject: [PATCH 067/405] Duplicate the postgres test refactoring into MySQL as well for easier editing later. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/MySQL.pm | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index fc14b4ff..9bc6f941 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -203,10 +203,15 @@ sub new_temp { # allow MyISAM in the test suite. $ENV{USE_UNSAFE_MYSQL} = 1 unless defined $ENV{USE_UNSAFE_MYSQL}; - system("$FindBin::Bin/../mogdbsetup", "--yes", "--dbname=$dbname", - "--dbhost=$host", "--dbport=$port", "--dbrootuser=$rootuser", - "--dbrootpass=$rootpass", "--dbuser=$user", "--dbpass=$pass") - and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; + my @args = ("$FindBin::Bin/../mogdbsetup", "--yes", + "--dbname=$dbname", "--type=MySQL", + "--dbhost=$host", "--dbport=$port", + "--dbrootuser=$rootuser", + "--dbuser=$user", ); + push @args, "--dbpass=$pass" unless $pass eq ''; + push @args, "--dbrootpass=$rootpass" unless $rootpass eq ''; + system(@args) + and die "Failed to run mogdbsetup (".join(' ',map { "'".$_."'" } @args).")."; if($user ne $rootuser) { $sto = MogileFS::Store->new_from_dsn_user_pass( From 469817780a20a74f90399b134f6323e4b84ff2b5 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 08:41:58 +0000 Subject: [PATCH 068/405] Create mogile database explicitly with UTF-8 encoding. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/Postgres.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 0bd5b5a9..25f0ec24 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -34,7 +34,7 @@ sub want_raise_errors { 1 } # if it it's made, or already exists. die otherwise. sub create_db_if_not_exists { my ($pkg, $rdbh, $dbname) = @_; - if(not $rdbh->do("CREATE DATABASE $dbname")) { + if(not $rdbh->do("CREATE DATABASE $dbname TEMPLATE template0 ENCODING 'UTF-8'" )) { die "Failed to create database '$dbname': " . $rdbh->errstr . "\n" if ($rdbh->errstr !~ /already exists/); } } From 4b0925a35d0e486d9d0bcb538c63b34683c95c04 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 09:40:42 +0000 Subject: [PATCH 069/405] Ensure all queues are drained before rebalance tests are started, and then also drain all queues before testing final state. Prevents spurious extra deletes where the file was already moved. Signed-off-by: Robin H. Johnson --- t/30-rebalance.t | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/t/30-rebalance.t b/t/30-rebalance.t index 190398cc..da123862 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -103,17 +103,22 @@ for my $n (1..$n_files) { pass("Created a ton of files"); # wait for replication to go down +# We need to wait for BOTH queues to be empty before we continue to rebalance. +# If there is anything left w/ a devid that we rebalance away from, there would +# be a failure when the HTTP delete happens simultaenously to the replication. +# This will manifest as subtest 48 failing often... { my $iters = 30; - my $to_repl_rows; + my ($to_repl_rows, $to_queue_rows); while ($iters) { $iters--; $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate"); - last if ! $to_repl_rows; - diag("Files to replicate: $to_repl_rows"); + $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); + last if $to_repl_rows eq 0 && $to_queue_rows eq 0; + diag("Files to replicate: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows"); sleep 1; } - die "Failed to replicate all $n_files files" if $to_repl_rows; + die "Failed to replicate all $n_files files" if $to_repl_rows || $to_queue_rows; pass("Replicated all $n_files files"); } @@ -233,19 +238,21 @@ if ($res) { # print "Start results: ", Dumper($res), "\n\n"; } -sleep 5; +# This sleep is not needed anymore, the rebalance is pretty damn fast. +#sleep 5; { my $iters = 30; - my $to_repl_rows; + my ($to_repl_rows, $to_queue_rows); while ($iters) { $iters--; - $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); - last if ! $to_repl_rows; - diag("Files to rebalance: $to_repl_rows"); + $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate"); + $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); + last if $to_repl_rows eq 0 && $to_queue_rows eq 0; + diag("Files to rebalance: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows"); sleep 1; } - die "Failed to rebalance all files" if $to_repl_rows; + die "Failed to rebalance all files" if $to_repl_rows || $to_queue_rows; pass("Replicated all files"); } From a89457a0224e5e1390cc69b7e058c0de3c4c1723 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 09:44:35 +0000 Subject: [PATCH 070/405] Update CHANGES. Signed-off-by: Robin H. Johnson --- CHANGES | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES b/CHANGES index 2b6a3711..9521eab3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ + * New test env variables: MOGTEST_DBROOT{USER,PASS} for easier test usage. (Robin H. Johnson ) + + * Document test variables. (Robin H. Johnson ) + + * Support inclusion of custom lighttpd config. (Jason Mills ) + 2011-01-13: Release version 2.46 * Fix job_master crash under postgres (Robin H. Johnson ) From be27d0236fa3db49c52060466f904b1f1eea2bab Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Thu, 2 Jun 2011 09:49:08 +0000 Subject: [PATCH 071/405] More test hints. Signed-off-by: Robin H. Johnson --- TESTING | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/TESTING b/TESTING index 03a9be4d..fa73d2c6 100644 --- a/TESTING +++ b/TESTING @@ -34,3 +34,13 @@ MOGTEST_DBUSER MOGTEST_DBPASS MOGTEST_DBROOTUSER MOGTEST_DBROOTPASS + +Other use test notes: +--------------------- +- Running the tests should be done with equally new versions of MogileFS-Utils + and perl-MogileFS-Client. If you have all 3 git repos checked out in + parallel, set: + "PATH=../MogileFS-Utils/:$PATH PERL5LIB=../perl-MogileFS-Client/lib" +- You can control the disk space used for the test mogstored's by setting + TMPDIR. + From 974e942a3cf26623ddacb6704296c92f11f363bf Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 2 Jun 2011 12:27:28 -0700 Subject: [PATCH 072/405] unfortunately the sleep is gross but necessary :( that is, unless someone fixes it properly :) --- t/30-rebalance.t | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/30-rebalance.t b/t/30-rebalance.t index da123862..1d5b560f 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -238,8 +238,10 @@ if ($res) { # print "Start results: ", Dumper($res), "\n\n"; } -# This sleep is not needed anymore, the rebalance is pretty damn fast. -#sleep 5; +# This sleep should be replaced with a "rebalance status" check to confirm +# it's been started. Otherwise there's up to two seconds where JobMaster might +# not have seen the start request yet. Lowered the sleep from 5 to 3. +sleep 3; { my $iters = 30; From 890c7b3c6bdc66cb9ce447182af6e70276e54161 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 6 Jun 2011 17:36:02 -0700 Subject: [PATCH 073/405] s/TYPE/ENGINE/ for innodb initialization TYPE=etc has been deprecated forever in MySQL. They're making good on that threat. --- lib/MogileFS/Store/MySQL.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 9bc6f941..83c6e606 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -271,7 +271,7 @@ sub create_table { # don't alter an existing table up to InnoDB from MyISAM... # could be costly. but on new tables, no problem... unless ($existed) { - $dbh->do("ALTER TABLE $table TYPE=InnoDB"); + $dbh->do("ALTER TABLE $table ENGINE=InnoDB"); warn "DBI reported an error of: '" . $dbh->errstr . "' when trying to " . "alter table type of $table to InnoDB\n" if $dbh->err; } From 4dff29cf643f15a1814496ed6fba73f0b82e5975 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 8 Jun 2011 18:06:13 -0700 Subject: [PATCH 074/405] allow passing dbhost/dbport to tests under Pg --- lib/MogileFS/Store/Postgres.pm | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 25f0ec24..635b55ec 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -473,14 +473,17 @@ sub new_temp { my $self = shift; my %args = @_; my $dbname = $args{dbname} || "tmp_mogiletest"; + my $host = $args{dbhost} || 'localhost'; + my $port = $args{dbport} || 5432; my $user = $args{dbuser} || 'mogile'; my $pass = $args{dbpass} || ''; my $rootuser = $args{dbrootuser} || $args{dbuser} || 'postgres'; my $rootpass = $args{dbrootpass} || $args{dbpass} || ''; - _drop_db($dbname,$rootuser,$rootpass); + _drop_db($dbname,$host,$port,$rootuser,$rootpass); my @args = ( "$FindBin::Bin/../mogdbsetup", "--yes", - "--dbname=$dbname", "--type=Postgres", + "--dbname=$dbname", "--type=Postgres", + "--dbhost=$host", "--dbport=$port", "--dbuser=$user", "--dbrootuser=$rootuser", ); push @args, "--dbpass=$pass" unless $pass eq ''; @@ -488,24 +491,28 @@ sub new_temp { system(@args) and die "Failed to run mogdbsetup (".join(' ',map { "'".$_."'" } @args).")."; - return MogileFS::Store->new_from_dsn_user_pass("dbi:Pg:dbname=$dbname", + return MogileFS::Store->new_from_dsn_user_pass("dbi:Pg:dbname=$dbname;host=$host;port=$port", $user, $pass); } my $rootdbh; sub _root_dbh { + my $host = shift; + my $port = shift; my $rootuser = shift; my $rootpass = shift; - return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres", $rootuser, $rootpass, { RaiseError => 1 }) + return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres;host=$host;port=$port", $rootuser, $rootpass, { RaiseError => 1 }) or die "Couldn't connect to local PostgreSQL database as $rootuser"; } sub _drop_db { my $dbname = shift; + my $host = shift; + my $port = shift; my $rootuser = shift; my $rootpass = shift; - my $root_dbh = _root_dbh($rootuser, $rootpass); + my $root_dbh = _root_dbh($host, $port, $rootuser, $rootpass); eval { $root_dbh->do("DROP DATABASE $dbname"); }; From ac3a1c5977da46a51a3aa34dbd52b789ea0234b0 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 8 Jun 2011 23:24:30 -0700 Subject: [PATCH 075/405] Don't run workers until Monitor runs --- lib/MogileFS/Connection/Client.pm | 14 +++++++++----- lib/MogileFS/ProcManager.pm | 22 ++++++++++++++++++---- lib/MogileFS/Server.pm | 8 +------- lib/MogileFS/Worker/Delete.pm | 3 --- lib/MogileFS/Worker/Fsck.pm | 12 ------------ lib/MogileFS/Worker/Monitor.pm | 10 +++++++++- lib/MogileFS/Worker/Replicate.pm | 11 ----------- 7 files changed, 37 insertions(+), 43 deletions(-) diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index f4a36eb6..bf58222c 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -88,12 +88,16 @@ sub handle_admin_command { $count = 500 if $count > 500; # now make sure it's a real job - if (MogileFS::ProcManager->is_valid_job($job)) { - MogileFS::ProcManager->request_job_process($job, $count); - push @out, "Now desiring $count children doing '$job'."; + if (MogileFS::ProcManager->is_monitor_good) { + if (MogileFS::ProcManager->is_valid_job($job)) { + MogileFS::ProcManager->request_job_process($job, $count); + push @out, "Now desiring $count children doing '$job'."; + } else { + my $classes = join(", ", MogileFS::ProcManager->valid_jobs); + push @out, "ERROR: Invalid class '$job'. Valid classes: $classes"; + } } else { - my $classes = join(", ", MogileFS::ProcManager->valid_jobs); - push @out, "ERROR: Invalid class '$job'. Valid classes: $classes"; + push @out, "ERROR: Monitor has not completed initial run yet\n"; } } else { push @out, "ERROR: usage: !want "; diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index f47595ee..23dda01a 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -44,8 +44,7 @@ my @prefork_cleanup; # subrefs to run to clean stuff up before we make a new ch *error = \&Mgd::error; -my %dev_util; # devid -> utilization -my $last_util_spray = 0; # time we lost spread %dev_util to children +my $monitor_good = 0; # ticked after monitor executes once after startup my $nowish; # updated approximately once per second @@ -299,6 +298,10 @@ sub foreach_pending_query { } } +sub is_monitor_good { + return $monitor_good; +} + sub is_valid_job { my ($class, $job) = @_; return defined $jobs{$job}; @@ -311,7 +314,7 @@ sub valid_jobs { sub request_job_process { my ($class, $job, $n) = @_; return 0 unless $class->is_valid_job($job); - return 0 if $job eq 'job_master' && $n > 1; # ghetto special case + return 0 if ($job =~ /^(?:job_master|monitor)$/i && $n > 1); # ghetto special case $jobs{$job}->[0] = $n; $allkidsup = 0; @@ -807,7 +810,18 @@ sub send_to_all_children { sub send_monitor_has_run { my $child = shift; - for my $type (qw(replicate fsck queryworker delete)) { + # Gas up other workers if monitor's completed for the first time. + if (! $monitor_good) { + MogileFS::ProcManager->set_min_workers('queryworker' => MogileFS->config('query_jobs')); + MogileFS::ProcManager->set_min_workers('delete' => MogileFS->config('delete_jobs')); + MogileFS::ProcManager->set_min_workers('replicate' => MogileFS->config('replicate_jobs')); + MogileFS::ProcManager->set_min_workers('reaper' => MogileFS->config('reaper_jobs')); + MogileFS::ProcManager->set_min_workers('fsck' => MogileFS->config('fsck_jobs')); + MogileFS::ProcManager->set_min_workers('job_master' => 1); + $monitor_good = 1; + $allkidsup = 0; + } + for my $type (qw(queryworker)) { MogileFS::ProcManager->ImmediateSendToChildrenByJob($type, ":monitor_has_run", $child); } } diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 33ffeec8..cb48ad52 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -104,13 +104,7 @@ sub run { MogileFS::Config->check_database; daemonize() if MogileFS->config("daemonize"); - MogileFS::ProcManager->set_min_workers('queryworker' => MogileFS->config('query_jobs')); - MogileFS::ProcManager->set_min_workers('delete' => MogileFS->config('delete_jobs')); - MogileFS::ProcManager->set_min_workers('replicate' => MogileFS->config('replicate_jobs')); - MogileFS::ProcManager->set_min_workers('reaper' => MogileFS->config('reaper_jobs')); - MogileFS::ProcManager->set_min_workers('monitor' => MogileFS->config('monitor_jobs')); - MogileFS::ProcManager->set_min_workers('fsck' => MogileFS->config('fsck_jobs')); - MogileFS::ProcManager->set_min_workers('job_master' => 1); + MogileFS::ProcManager->set_min_workers('monitor' => 1); # open up our log Sys::Syslog::openlog('mogilefsd', 'pid', 'daemon'); diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index 9eb2def7..2f1e28d5 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -31,9 +31,6 @@ sub work { my $old_queue_check = 0; # next time to check the old queue. my $old_queue_backoff = 0; # backoff index - # wait for one pass of the monitor - $self->wait_for_monitor; - while (1) { $self->send_to_parent("worker_bored 50 delete"); $self->read_from_parent(1); diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 21739e37..b9bcee8e 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -37,8 +37,6 @@ sub watchdog_timeout { 120 } sub work { my $self = shift; - my $run_count = 0; - # this can be CPU-intensive. let's nice ourselves down. POSIX::nice(10); @@ -76,16 +74,6 @@ sub work { $nowish = time(); local $Mgd::nowish = $nowish; - # checking doesn't go well if the monitor job hasn't actively started - # marking things as being available - unless ($self->monitor_has_run) { - # only warn on runs after the first. gives the monitor job some time to work - # before we throw a message. - debug("waiting for monitor job to complete a cycle before beginning") - if $run_count++ > 0; - return; - } - my $queue_todo = $self->queue_todo('fsck'); # This counts the same as a $self->still_alive; $self->send_to_parent('worker_bored 50 fsck'); diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index e43124e0..0ad60d74 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -63,6 +63,10 @@ sub work { } }); + # We announce "monitor_just_ran" every time the device checks are run, but + # only if the DB has been checked inbetween. + my $db_monitor_ran = 0; + my $db_monitor; $db_monitor = sub { $self->parent_ping; @@ -75,7 +79,7 @@ sub work { $self->diff_data($db_data); $self->send_events_to_parent; - $self->send_to_parent(":monitor_just_ran"); + $db_monitor_ran++; Danga::Socket->AddTimer(4, $db_monitor); }; @@ -110,6 +114,10 @@ sub work { $self->send_events_to_parent; $iow->set_hosts(keys %{$self->{seen_hosts}}); + if ($db_monitor_ran) { + $self->send_to_parent(":monitor_just_ran"); + $db_monitor_ran = 0; + } Danga::Socket->AddTimer(2.5, $main_monitor); }; diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index bb55f568..8d9918d4 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -32,18 +32,7 @@ sub watchdog_timeout { 90; } sub work { my $self = shift; - # give the monitor job 15 seconds to give us an update - my $warn_after = time() + 15; - every(1.0, sub { - # replication doesn't go well if the monitor job hasn't actively started - # marking things as being available - unless ($self->monitor_has_run) { - error("waiting for monitor job to complete a cycle before beginning replication") - if time() > $warn_after; - return; - } - $self->send_to_parent("worker_bored 100 replicate rebalance"); my $queue_todo = $self->queue_todo('replicate'); From fc06560001ad93ce21c823954dc5160c16e0450c Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 15 Jun 2011 17:27:05 -0700 Subject: [PATCH 076/405] Update changelog and Perlbal version dep --- CHANGES | 17 +++++++++++++++++ Makefile.PL | 2 +- debian/control | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 9521eab3..d1922e68 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,20 @@ + * Completely rewire meta object caches. Device/Host/Class/Domain caches are + now asynchronously loaded via the Monitor worker. This should drastically + reduce the number of small queries issued against the master database. + (dormando ) + + * Don't run workers until Monitor runs once (dormando ) + + * s/TYPE/ENGINE/ for innodb initialization (dormando ) + + * Create mogile database explicitly with UTF-8 encoding. (Robin H. Johnson ) + + * Actually use the MOGTEST_* variables for Postgres testing... (Robin H. Johnson ) + + * Add support for config file includes for Mogstored under lighttpd (Jason Mills ) + + * Make postgres use new delete worker code (Robin H. Johnson ) + * New test env variables: MOGTEST_DBROOT{USER,PASS} for easier test usage. (Robin H. Johnson ) * Document test variables. (Robin H. Johnson ) diff --git a/Makefile.PL b/Makefile.PL index cd5af502..b8739d66 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -23,7 +23,7 @@ WriteMakefile( EXE_FILES => $exefiles, PREREQ_PM => { 'Danga::Socket' => '1.56', - 'Perlbal' => '1.76', + 'Perlbal' => '1.79', 'Sys::Syslog' => 0, 'Sys::Syscall' => '0.22', 'Getopt::Long' => 0, diff --git a/debian/control b/debian/control index d415603b..99148aa0 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Standards-Version: 3.6.1.0 Package: mogstored Architecture: all -Depends: ${perl:Depends}, libperlbal-perl (>= 1.76), libio-aio-perl, debconf (>= 1.2.0) +Depends: ${perl:Depends}, libperlbal-perl (>= 1.79), libio-aio-perl, debconf (>= 1.2.0) Suggests: mogilefs-utils Description: storage node daemon for MogileFS Mogstored is a storage node daemon for MogileFS, the open-source From e39802ddc2712ae0290ad8ad1a2dea23f3606eb4 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 15 Jun 2011 17:29:15 -0700 Subject: [PATCH 077/405] Checking in changes prior to tagging of version 2.50. Changelog diff is: diff --git a/CHANGES b/CHANGES index d1922e6..c2a33f9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2011-01-13: Release version 2.50 + * Completely rewire meta object caches. Device/Host/Class/Domain caches are now asynchronously loaded via the Monitor worker. This should drastically reduce the number of small queries issued against the master database. --- CHANGES | 2 ++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index d1922e68..c2a33f90 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2011-01-13: Release version 2.50 + * Completely rewire meta object caches. Device/Host/Class/Domain caches are now asynchronously loaded via the Monitor worker. This should drastically reduce the number of small queries issued against the master database. diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index cb48ad52..49968ab3 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.46"; +$VERSION = "2.50"; =head1 NAME From 1d3e9138b733cf3080cd856fdbd83a01d9e317d7 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 27 Jun 2011 11:25:13 -0700 Subject: [PATCH 078/405] Only monitor device if we should monitor it. Fix found by "jed204" on IRC. --- lib/MogileFS/Worker/Monitor.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 0ad60d74..e3738de7 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -105,7 +105,7 @@ sub work { } $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev); + $self->check_device($dev) if $dev->dstate->should_monitor; } $self->{devutil}->{prev} = $cur_iow; From d433a87e016d178643cfe2cd4702ab67bd1464f0 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 27 Jun 2011 11:32:20 -0700 Subject: [PATCH 079/405] Add minor backcompat for ZoneLocal and friends MogileFS::Device->devices was being called directly within a few replication plugins, so we'll add a compat interface for now. We'll kick new releases of related modules and eventually remove the compat interface. --- lib/MogileFS/Device.pm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 09ad5551..fd6364f8 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -291,4 +291,13 @@ sub set_observed_utilization { return 1; } +# Compatibility interface since this old routine is unfortunately called +# internally within plugins. This data should be passed into any hooks which +# may need it? +# Currently an issue with MogileFS::Network + ZoneLocal +# Remove this in 2012. +sub devices { + return Mgd::device_factory()->get_all; +} + 1; From 00e386974b516e9a65e7c41e2504eb9bd37bb981 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 27 Jun 2011 11:59:04 -0700 Subject: [PATCH 080/405] Checking in changes prior to tagging of version 2.51. Changelog diff is: diff --git a/CHANGES b/CHANGES index c2a33f9..df4564d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,10 @@ -2011-01-13: Release version 2.50 +2011-06-27: Release version 2.51 + + * Add minor backcompat for ZoneLocal and friends (dormando ) + + * Only monitor a device if we should monitor it. (dormando ) + +2011-06-15: Release version 2.50 * Completely rewire meta object caches. Device/Host/Class/Domain caches are now asynchronously loaded via the Monitor worker. This should drastically --- CHANGES | 8 +++++++- lib/MogileFS/Server.pm | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index c2a33f90..df4564de 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,10 @@ -2011-01-13: Release version 2.50 +2011-06-27: Release version 2.51 + + * Add minor backcompat for ZoneLocal and friends (dormando ) + + * Only monitor a device if we should monitor it. (dormando ) + +2011-06-15: Release version 2.50 * Completely rewire meta object caches. Device/Host/Class/Domain caches are now asynchronously loaded via the Monitor worker. This should drastically diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 49968ab3..253f273c 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.50"; +$VERSION = "2.51"; =head1 NAME From 2470024e75eafdcad145de5b63713e5aab5cb757 Mon Sep 17 00:00:00 2001 From: Akzhan Abdulin Date: Thu, 31 Mar 2011 14:19:06 +0400 Subject: [PATCH 081/405] Don't test write on readonly devices --- lib/MogileFS/Worker/Monitor.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index e3738de7..b3476644 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -329,6 +329,13 @@ sub check_device { return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; $self->{last_test_write}{$devid} = $now; + unless ($dev->can_delete_from) { + # we should not try to write on readonly devices because it can be # mounted as RO. + $self->state_event('device', $devid, {observed_state => 'readable'}) + if (!$dev->observed_readable); + debug("dev$devid: used = $used, total = $total, writeable = 0"); + return; + } # now we want to check if this device is writeable # first, create the test-write directory. this will return From 346676d533d535b06cb1b0401c5e2d2ee5834369 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 6 Jun 2011 19:03:45 -0700 Subject: [PATCH 082/405] Remove forced loading of Store/MySQL.pm So that line was annoying Postgres/etc users since it forces you to have DBD::mysql. I removed it, but left a note saying that we have to ensure the DB driver gets preloaded before daemonization. It actually does already: ->check_database happens right before the daemonize, which initializes a store, which kicks off the appropriate `use`. So all we had to do all this time was remove the stupid line, sorry. --- lib/MogileFS/Server.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 253f273c..fcbacd28 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -78,7 +78,6 @@ use MogileFS::FID; use MogileFS::DevFID; use MogileFS::Store; -use MogileFS::Store::MySQL; # FIXME: don't load this until after reading their config, but before fork. use MogileFS::ReplicationPolicy::MultipleHosts; From 18a40d29236a8a00593f914191c9fd16cda082db Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 27 Jul 2011 16:01:30 -0700 Subject: [PATCH 083/405] Throw out old HTTPFile->size code and use LWP The old file size check code was a mess. It had missleading errors, such as saying a path failed with it couldn't connect to a side channel. I'm also completely dubious of the continued automatic non-optional use of the size checking sidechannel. Users with nginx or apache backends are better served using HTTP HEAD requests against those, even if they use mogstored for PUT's or usage file generation. --- lib/MogileFS/HTTPFile.pm | 210 +++++---------------------------------- 1 file changed, 26 insertions(+), 184 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index bc2fec42..23c77730 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -6,11 +6,10 @@ use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); # (caching the connection used for HEAD requests) -my %http_socket; # host:port => [$pid, $time, $socket] +my $user_agent; -# get size of file, return 0 on error. -# tries to finish in 2.5 seconds, under the client's default 3 second timeout. (configurable) -my %last_stream_connect_error; # host => $hirestime. +my %size_check_retry_after; # host => $hirestime. +my %size_check_failcount; # host => $count. # create a new MogileFS::HTTPFile instance from a URL. not called # "new" because I don't want to imply that it's creating anything. @@ -79,7 +78,6 @@ sub delete { next; } unless ($rescode == 204) { - delete $http_socket{"$host:$port"}; die "Bad response from $host:$port: [$line]"; } $did_del = 1; @@ -92,199 +90,43 @@ sub delete { return 1; } -# returns size of file, (doing a HEAD request and looking at content-length, or side-channel to mogstored) -# returns -1 on file missing (404 or -1 from sidechannel), +# returns size of file, (doing a HEAD request and looking at content-length) +# returns -1 on file missing (404), # returns undef on connectivity error use constant FILE_MISSING => -1; sub size { my $self = shift; my ($host, $port, $uri, $path) = map { $self->{$_} } qw(host port uri url); + return undef if (exists $size_check_retry_after{$host} + && $size_check_retry_after{$host} > Time::HiRes::time()); + # don't SIGPIPE us my $flag_nosignal = MogileFS::Sys->flag_nosignal; local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; - # setup for sending size request to cached host - my $req = "size $uri\r\n"; - my $reqlen = length $req; - my $rv = 0; - - my $mogconn = $self->host->mogstored_conn; - my $sock = $mogconn->sock_if_connected; - - my $start_time = Time::HiRes::time(); - - my $httpsock; - my $start_connecting_to_http = sub { - return if $httpsock; # don't allow starting connecting twice - - # try to reuse cached socket - if (my $cached = $http_socket{"$host:$port"}) { - my ($pid, $conntime, $cachesock) = @{ $cached }; - # see if it's still connected - if ($pid == $$ && getpeername($cachesock) && - $conntime > $start_time - 15 && - # readability would indicated conn closed, or garbage: - ! wait_for_readability(fileno($cachesock), 0.00)) - { - $httpsock = $cachesock; - return; - } - } - - socket $httpsock, PF_INET, SOCK_STREAM, IPPROTO_TCP; - IO::Handle::blocking($httpsock, 0); - connect $httpsock, Socket::sockaddr_in($port, Socket::inet_aton($host)); - }; - - # sub to parse the response from $sock. returns undef on error, - # or otherwise the size of the $path in bytes. my $node_timeout = MogileFS->config("node_timeout"); - my $stream_response_timeout = 1.0; - my $read_timed_out = 0; - - # returns defined on a real answer (-1 = file missing, >=0 = file length), - # returns undef on connectivity problems. - my $parse_response = sub { - # give the socket 1 second to become readable until we get - # scared of no reply and start connecting to HTTP to do a HEAD - # request. if both timeout, we know the machine is gone, but - # we don't want to wait 2 seconds + 2 seconds... prefer to do - # connects in parallel to reduce overall latency. - unless (wait_for_readability(fileno($sock), $stream_response_timeout)) { - $start_connecting_to_http->(); - # give the socket its final time to get to 2 seconds - # before we really give up on it - unless (wait_for_readability(fileno($sock), $node_timeout - $stream_response_timeout)) { - $read_timed_out = 1; - close($sock); - return undef; - } + # Hardcoded connection cache size of 20 :( + $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); + my $res = $user_agent->head($path); + if ($res->is_success) { + delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; + return $res->header('content-length'); + } else { + if ($res->code == 404) { + delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; + return FILE_MISSING; } - - # now we know there's readable data (pseudo-gross: we assume - # if we were readable, the whole line is ready. this is a - # poor mix of low-level IO and buffered, blocking stdio but in - # practice it works...) - my $line = <$sock>; - return undef unless defined $line; - return undef unless $line =~ /^(\S+)\s+(-?\d+)/; # expected format: "uri size" - return undeferr("get_file_size() requested size of $path, got back size of $1 ($2 bytes)") - if $1 ne $uri; - # backchannel sends back -1 on non-existent file, which we map to the defined value '-1' - return FILE_MISSING if $2 < 0; - # otherwise, return byte size of file - return $2+0; - }; - - my $conn_timeout = MogileFS->config("conn_timeout") || 2; - - # try using the cached socket - if ($sock) { - $rv = send($sock, $req, $flag_nosignal); - if ($!) { - $mogconn->mark_dead; - } elsif ($rv != $reqlen) { - # FIXME: perhaps we shouldn't error here, but instead - # treat the cached socket as bogus and reconnect? never - # seen that happen, though. - return undeferr("send() didn't return expected length ($rv, not $reqlen) for $path"); - } else { - # success - my $size = $parse_response->(); - return $size if defined $size; - $mogconn->mark_dead; + if ($res->message =~ m/connect:/) { + my $count = $size_check_failcount{$host}; + $count ||= 1; + $count *= 2 unless $count > 360; + $size_check_retry_after{$host} = Time::HiRes::time() + $count; + $size_check_failcount{$host} = $count; } + return undeferr("Failed HEAD check for $path (" . $res->code . "): " + . $res->message); } - # try creating a connection to the stream - elsif (($last_stream_connect_error{$host} ||= 0) < $start_time - 15.0) - { - $sock = $mogconn->sock($conn_timeout); - - if ($sock) { - $rv = send($sock, $req, $flag_nosignal); - if ($!) { - return undeferr("error talking to mogstored stream ($path): $!"); - } elsif ($rv != $reqlen) { - return undeferr("send() didn't return expected length ($rv, not $reqlen) for $path"); - } else { - # success - my $size = $parse_response->(); - return $size if defined $size; - $mogconn->mark_dead; - } - } else { - # see if we timed out connecting. - my $elapsed = Time::HiRes::time() - $start_time; - if ($elapsed > $conn_timeout - 0.2) { - return undeferr("node $host seems to be down in get_file_size"); - } else { - # cache that we can't connect to the mogstored stream - # port for people using only apache/lighttpd (dav) on - # the storage nodes - $last_stream_connect_error{$host} = Time::HiRes::time(); - } - - } - } - - # failure case: use a HEAD request to get the size of the file: - # give them 2 seconds to connect to server, unless we'd already timed out earlier - my $time_remain = 2.5 - (Time::HiRes::time() - $start_time); - return undeferr("timed out on stream size check of $path, not doing HEAD") - if $time_remain <= 0; - - # try HTTP (this will only work once anyway, if we already started above) - $start_connecting_to_http->(); - - # did we timeout? - unless (wait_for_writeability(fileno($httpsock), $time_remain)) { - return undeferr("get_file_size() connect timeout for HTTP HEAD for size of $path"); - } - - # did we fail to connect? (got a RST, etc) - unless (getpeername($httpsock)) { - return undeferr("get_file_size() connect failure for HTTP HEAD for size of $path"); - } - - $time_remain = 2.5 - (Time::HiRes::time() - $start_time); - return undeferr("no time remaining to write HEAD request to $path") if $time_remain <= 0; - - $rv = syswrite($httpsock, "HEAD $uri HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - # FIXME: we don't even look at $rv ? - return undeferr("get_file_size() read timeout ($time_remain) for HTTP HEAD for size of $path") - unless wait_for_readability(fileno($httpsock), $time_remain); - - my $first = <$httpsock>; - return undeferr("get_file_size()'s HEAD request hung up on us") - unless $first; - my ($code) = $first =~ m!^HTTP/1\.\d (\d\d\d)! or - return undeferr("HEAD response to get_file_size looks bogus"); - return FILE_MISSING if $code == 404; - return undeferr("get_file_size()'s HEAD request wasn't a 200 OK, got: $code") - unless $code == 200; - - # FIXME: this could block too probably, if we don't get a whole - # line. in practice, all headers will come at once, though in same packet/read. - my $cl = undef; - my $keep_alive = 0; - while (defined (my $line = <$httpsock>)) { - if ($line eq "\r\n") { - if ($keep_alive) { - $http_socket{"$host:$port"} = [ $$, Time::HiRes::time(), $httpsock ]; - } else { - delete $http_socket{"$host:$port"}; - } - return $cl; - } - $cl = $1 if $line =~ /^Content-length: (\d+)/i; - $keep_alive = 1 if $line =~ /^Connection:.+\bkeep-alive\b/i; - } - delete $http_socket{"$host:$port"}; - - # no content length found? - return undeferr("get_file_size() found no content-length header in response for $path"); } - 1; From a534b92f8bec92bb64e5abda0dbeb9d77a94444e Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 10 Dec 2010 01:09:50 -0800 Subject: [PATCH 084/405] Make FSCK worker handle dead devices FSCK would ignore dead states and retry fids forever. It probably didn't care before as the reaper is supposed to remove such references. --- lib/MogileFS/Worker/Fsck.pm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index b9bcee8e..297546a1 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -108,7 +108,6 @@ sub work { # some connectivity problem... retry this fid later. # (don't dequeue it) $self->still_alive; - sleep 5; next; } $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); @@ -210,6 +209,12 @@ sub check_fid { my ($dfid, $disk_size) = @_; if (! defined $disk_size) { my $dev = $dfid->device; + # We end up checking is_perm_dead twice, but that's the way the + # flow goes... + if ($dev->dstate->is_perm_dead) { + $err = "needfix"; + return 0; + } error("Connectivity problem reaching device " . $dev->id . " on host " . $dev->host->ip . "\n"); $err = "stalled"; return 0; @@ -273,6 +278,12 @@ sub fix_fid { my $dev = $dfid->device; next if $already_checked{$dev->id}++; + # Got a dead link, but reaper hasn't cleared it yet? + if ($dev->dstate->is_perm_dead) { + push @bad_devs, $dev; + next; + } + my $disk_size = $self->size_on_disk($dfid); die "dev " . $dev->id . " unreachable" unless defined $disk_size; @@ -420,6 +431,7 @@ sub init_size_checker { # else size of file on disk (after HTTP HEAD or mogstored stat) sub size_on_disk { my ($self, $dfid) = @_; + return undef if $dfid->device->dstate->is_perm_dead; return $dfid->size_on_disk; # Mass checker is disabled for now... doesn't run on our production # hosts due to massive gaps in the fids. Instead we make the process From 0eb4cb5f9f5bcd5618f8df2907b041272c21b7a8 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 29 Jul 2011 11:25:38 -0700 Subject: [PATCH 085/405] schema version 14; support devs > 16TB mediumint -> int. seems that postgres is a noop. --- lib/MogileFS/Store.pm | 9 ++++++--- lib/MogileFS/Store/MySQL.pm | 9 +++++++++ lib/MogileFS/Store/Postgres.pm | 5 +++++ lib/MogileFS/Store/SQLite.pm | 4 ++-- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b0ab2b09..0c681075 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -18,7 +18,8 @@ use List::Util (); # 12: adds 'file_to_delete2' table # 13: modifies 'server_settings.value' to TEXT for wider values # also adds a TEXT 'arg' column to file_to_queue for passing arguments -use constant SCHEMA_VERSION => 13; +# 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB +use constant SCHEMA_VERSION => 14; sub new { my ($class) = @_; @@ -435,6 +436,7 @@ sub setup_database { $sto->upgrade_add_class_replpolicy; $sto->upgrade_modify_server_settings_value; $sto->upgrade_add_file_to_queue_arg; + $sto->upgrade_modify_device_size; return 1; } @@ -625,8 +627,8 @@ sub TABLE_device { status ENUM('alive','dead','down'), weight MEDIUMINT DEFAULT 100, - mb_total MEDIUMINT UNSIGNED, - mb_used MEDIUMINT UNSIGNED, + mb_total INT UNSIGNED, + mb_used INT UNSIGNED, mb_asof INT UNSIGNED, PRIMARY KEY (devid), INDEX (status) @@ -719,6 +721,7 @@ sub upgrade_add_device_readonly { 1 } sub upgrade_add_device_drain { die "Not implemented in $_[0]" } sub upgrade_modify_server_settings_value { die "Not implemented in $_[0]" } sub upgrade_add_file_to_queue_arg { die "Not implemented in $_[0]" } +sub upgrade_modify_device_size { die "Not implemented in $_[0]" } sub upgrade_add_class_replpolicy { my ($self) = @_; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 83c6e606..514663ee 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -387,6 +387,15 @@ sub upgrade_add_file_to_queue_arg { } } +sub upgrade_modify_device_size { + my $self = shift; + for my $col ('mb_total', 'mb_used') { + if ($self->column_type("device", $col) =~ m/mediumint/i) { + $self->dowell("ALTER TABLE device MODIFY COLUMN $col INT UNSIGNED"); + } + } +} + sub pre_daemonize_checks { my $self = shift; # Jay Buffington, from the mailing lists, writes: diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 635b55ec..aa34da8d 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -304,6 +304,11 @@ sub upgrade_add_file_to_queue_arg { } } +# Postgres doesn't have or never used a MEDIUMINT for device. +sub upgrade_modify_device_size { + return 1; +} + # return 1 on success. die otherwise. sub enqueue_fids_to_delete { # My kingdom for a real INSERT IGNORE implementation! diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 711cccb3..567b2ae1 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -131,8 +131,8 @@ sub TABLE_device { status ENUM('alive','dead','down','readonly','drain'), weight MEDIUMINT DEFAULT 100, - mb_total MEDIUMINT UNSIGNED, - mb_used MEDIUMINT UNSIGNED, + mb_total INT UNSIGNED, + mb_used INT UNSIGNED, mb_asof INT UNSIGNED, PRIMARY KEY (devid), INDEX (status) From 45709280401f1939ae1d62058ffa5609a04eb7b6 Mon Sep 17 00:00:00 2001 From: Michael Baehr Date: Thu, 2 Jun 2011 12:23:20 -0700 Subject: [PATCH 086/405] Plugins (such as FilePaths) might want to be run before these commands; give them a chance --- lib/MogileFS/Worker/Query.pm | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 79ec23dc..b664d2d0 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -523,6 +523,12 @@ sub cmd_file_debug { $args->{dmid} = $self->check_domain($args) or return $self->err_line('domain_not_found'); return $self->err_line("no_key") unless $args->{key}; + + # now invoke the plugin, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_file_debug', $args); + return $self->err_line('plugin_aborted') + if defined $rv && ! $rv; + $fid = $sto->file_row_from_dmid_key($args->{dmid}, $args->{key}); return $self->err_line("unknown_key") unless $fid; $fidid = $fid->{fid}; @@ -570,9 +576,15 @@ sub cmd_file_info { my MogileFS::Worker::Query $self = shift; my $args = shift; + # validate domain for plugins $args->{dmid} = $self->check_domain($args) or return $self->err_line('domain_not_found'); + # now invoke the plugin, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_file_info', $args); + return $self->err_line('plugin_aborted') + if defined $rv && ! $rv; + # validate parameters my $dmid = $args->{dmid}; my $key = $args->{key} or return $self->err_line("no_key"); From 24b5e6fa477592740388e861d38c006fbc820286 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 30 Jul 2011 00:05:07 -0700 Subject: [PATCH 087/405] Checking in changes prior to tagging of version 2.52. Changelog diff is: diff --git a/CHANGES b/CHANGES index df4564d..54166a5 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,17 @@ +2011-07-30: Release version 2.52 + + * Plugins (such as FilePaths) might want to be run in info/debug commands (Michael Baehr ) + + * schema version 14; support devices > 16TB (dormando ) + + * Make FSCK worker handle dead devices (dormando ) + + * Throw out old HTTPFile->size code and use LWP (dormando ) + + * Remove forced loading of DBD::mysql (dormando ) + + * Don't test write on readonly devices (Akzhan Abdulin ) + 2011-06-27: Release version 2.51 * Add minor backcompat for ZoneLocal and friends (dormando ) --- CHANGES | 14 ++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index df4564de..54166a50 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,17 @@ +2011-07-30: Release version 2.52 + + * Plugins (such as FilePaths) might want to be run in info/debug commands (Michael Baehr ) + + * schema version 14; support devices > 16TB (dormando ) + + * Make FSCK worker handle dead devices (dormando ) + + * Throw out old HTTPFile->size code and use LWP (dormando ) + + * Remove forced loading of DBD::mysql (dormando ) + + * Don't test write on readonly devices (Akzhan Abdulin ) + 2011-06-27: Release version 2.51 * Add minor backcompat for ZoneLocal and friends (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index fcbacd28..ccb7731a 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.51"; +$VERSION = "2.52"; =head1 NAME From 8b79258a9a88e38ae25aafa94d270d6f1231c5ac Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 12 Aug 2011 00:05:56 -0700 Subject: [PATCH 088/405] Fix host status not updating New internal monitoring was ignoring host updates due to an early return error. This bug is semi serious, as it only effects hosts due to them often having many NULL fields in the database. --- lib/MogileFS/Worker/Monitor.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index b3476644..b7cf4028 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -219,7 +219,7 @@ sub diff_hash { return 1 if (exists $new->{$k} && ! exists $old->{$k}); return 1 if (defined $old->{$k} && ! defined $new->{$k}); return 1 if (defined $new->{$k} && ! defined $old->{$k}); - return 0 if (! defined $new->{$k} && ! defined $old->{$k}); + next if (! defined $new->{$k} && ! defined $old->{$k}); return 1 if ($old->{$k} ne $new->{$k}); } return 0; From ef1d9d860aba1efef900eb56ad64a8b035dc3a29 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 12 Aug 2011 09:06:03 -0700 Subject: [PATCH 089/405] Checking in changes prior to tagging of version 2.53. Changelog diff is: diff --git a/CHANGES b/CHANGES index 54166a5..d992720 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2011-08-12: Release version 2.53 + + * Fix host status not updating for trackers (important) + 2011-07-30: Release version 2.52 * Plugins (such as FilePaths) might want to be run in info/debug commands (Michael Baehr ) --- CHANGES | 4 ++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 54166a50..d9927205 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2011-08-12: Release version 2.53 + + * Fix host status not updating for trackers (important) + 2011-07-30: Release version 2.52 * Plugins (such as FilePaths) might want to be run in info/debug commands (Michael Baehr ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index ccb7731a..e873a413 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.52"; +$VERSION = "2.53"; =head1 NAME From 163c7d6de0f690d03d2d41d00c6161fbc4511cdf Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 3 Sep 2011 14:31:40 -0700 Subject: [PATCH 090/405] Allow chunked PUT's of unlimited size For some reason the Perlbal PUT default is 0 (unlimited), but chunked PUT is 200M. Lets flip the default to 0 in mogstored, so chunked PUT's will work reliably. Thanks to victori for pointing out that this setting exists and has a silly default :) --- lib/Mogstored/HTTPServer/Perlbal.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Mogstored/HTTPServer/Perlbal.pm b/lib/Mogstored/HTTPServer/Perlbal.pm index b9e2d760..17f33ae6 100644 --- a/lib/Mogstored/HTTPServer/Perlbal.pm +++ b/lib/Mogstored/HTTPServer/Perlbal.pm @@ -44,6 +44,7 @@ SERVER max_connections = $self->{maxconns} SET mogstored.enable_delete = 1 SET mogstored.min_put_directory = 1 SET mogstored.persist_client = 1 + SET mogstored.max_chunked_request_size = 0 ENABLE mogstored "; From f63f630a71fdb2d938a6871e98cea1a2aaeeae66 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Tue, 30 Aug 2011 14:43:59 +0100 Subject: [PATCH 091/405] Fix various issues in the new mogilefs server query worker. 1) Error: Can't locate object method get_by_id via package Mgd::domain_factory (perhaps you forgot to load Mgd::domain_factory?) at /usr/share/perl5/MogileFS/Worker/Query.pm line 639 Fixed by using MogileFS::Server everywhere that calls Mgd::xxxx functions 2) :: [queryworker(10076)] Error running command 'list_fids': Can't locate object method get_by_id via package Mgd::domain_factory (perhaps you forgot to load Mgd::domain_factory?) at /usr/share/perl5/MogileFS/Worker/Query.pm line 639. Fixed by changing Mgd::domain_factory->foo to Mgd::domain_factory()->foo --- CHANGES | 2 ++ lib/MogileFS/Config.pm | 1 + lib/MogileFS/DevFID.pm | 1 + lib/MogileFS/Device.pm | 1 + lib/MogileFS/Domain.pm | 1 + lib/MogileFS/FID.pm | 1 + lib/MogileFS/HTTPFile.pm | 1 + lib/MogileFS/ProcManager.pm | 1 + lib/MogileFS/Rebalance.pm | 2 ++ lib/MogileFS/ReplicationRequest.pm | 1 + lib/MogileFS/Util.pm | 1 + lib/MogileFS/Worker/Delete.pm | 1 + lib/MogileFS/Worker/Fsck.pm | 1 + lib/MogileFS/Worker/JobMaster.pm | 1 + lib/MogileFS/Worker/Monitor.pm | 1 + lib/MogileFS/Worker/Query.pm | 5 +++-- lib/MogileFS/Worker/Reaper.pm | 1 + lib/MogileFS/Worker/Replicate.pm | 1 + 18 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index d9927205..7a8879c1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ + * Fix listing FIDs. + 2011-08-12: Release version 2.53 * Fix host status not updating for trackers (important) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 1861910a..a083a477 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -5,6 +5,7 @@ use MogileFS::ProcManager; use Getopt::Long; use MogileFS::Store; use Sys::Hostname (); +use MogileFS::Server; our @ISA = qw(Exporter); our @EXPORT = qw($DEBUG config set_config FSCK_QUEUE REBAL_QUEUE); diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index e480ac37..7e0d5d49 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -1,6 +1,7 @@ package MogileFS::DevFID; use strict; use warnings; +use MogileFS::Server; use overload '""' => \&as_string; use Carp qw(croak); diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index fd6364f8..18dbc221 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -2,6 +2,7 @@ package MogileFS::Device; use strict; use warnings; use Carp qw/croak/; +use MogileFS::Server; use MogileFS::Util qw(throw); use MogileFS::Util qw(okay_args device_state error); diff --git a/lib/MogileFS/Domain.pm b/lib/MogileFS/Domain.pm index 4c15a421..a1464ebc 100644 --- a/lib/MogileFS/Domain.pm +++ b/lib/MogileFS/Domain.pm @@ -1,6 +1,7 @@ package MogileFS::Domain; use strict; use warnings; +use MogileFS::Server; use MogileFS::Util qw(throw); =head1 diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 1aa2d9e7..76af0ad7 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -3,6 +3,7 @@ use strict; use warnings; use Carp qw(croak); use MogileFS::ReplicationRequest qw(rr_upgrade); +use MogileFS::Server; use overload '""' => \&as_string; sub new { diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 23c77730..65c8ec2d 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -3,6 +3,7 @@ use strict; use warnings; use Carp qw(croak); use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); +use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); # (caching the connection used for HEAD requests) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 23dda01a..669c6e59 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -4,6 +4,7 @@ use warnings; use POSIX qw(:sys_wait_h sigprocmask SIGINT SIG_BLOCK SIG_UNBLOCK); use Symbol; use Socket; +use MogileFS::Server; use MogileFS::Connection::Client; use MogileFS::Connection::Worker; use MogileFS::Util qw(apply_state_events); diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index c936f044..bd282df4 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -3,6 +3,8 @@ use strict; use warnings; use Carp qw(croak); use List::Util (); +use MogileFS::Server (); + # Note: The filters aren't written for maximum speed, as they're not likely # in the slow path. They're supposed to be readable/extensible. Please don't # cram them down unless you have to. diff --git a/lib/MogileFS/ReplicationRequest.pm b/lib/MogileFS/ReplicationRequest.pm index 373d6656..a5b0828d 100644 --- a/lib/MogileFS/ReplicationRequest.pm +++ b/lib/MogileFS/ReplicationRequest.pm @@ -1,5 +1,6 @@ package MogileFS::ReplicationRequest; use strict; +use MogileFS::Server; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(rr_upgrade ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index e908e92a..143f9576 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -2,6 +2,7 @@ package MogileFS::Util; use strict; use Carp qw(croak); use Time::HiRes; +use MogileFS::Server; use MogileFS::Exception; use MogileFS::DeviceState; diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index 2f1e28d5..56665bfa 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -4,6 +4,7 @@ package MogileFS::Worker::Delete; use strict; use base 'MogileFS::Worker'; use MogileFS::Util qw(error); +use MogileFS::Server; # we select 1000 but only do a random 100 of them, to allow # for stateless parallelism diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 297546a1..9fe082be 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -10,6 +10,7 @@ use fields ( ); use MogileFS::Util qw(every error debug); use MogileFS::Config; +use MogileFS::Server; use List::Util (); use Time::HiRes (); diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 00a5100f..627b4bce 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -14,6 +14,7 @@ use fields ( ); use MogileFS::Util qw(every error debug encode_url_args); use MogileFS::Config; +use MogileFS::Server; use constant DEF_FSCK_QUEUE_MAX => 20_000; use constant DEF_FSCK_QUEUE_INJECT => 1000; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index b7cf4028..28d7418c 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -19,6 +19,7 @@ use Danga::Socket 1.56; use MogileFS::Config; use MogileFS::Util qw(error debug encode_url_args); use MogileFS::IOStatWatcher; +use MogileFS::Server; use constant UPDATE_DB_EVERY => 15; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index b664d2d0..d4af1174 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -11,6 +11,7 @@ use MogileFS::Util qw(error error_code first weighted_list use MogileFS::HTTPFile; use MogileFS::Rebalance; use MogileFS::Config; +use MogileFS::Server; sub new { my ($class, $psock) = @_; @@ -637,9 +638,9 @@ sub cmd_list_fids { my $fid = $r->{fid}; $ret->{"fid_${ct}_fid"} = $fid; $ret->{"fid_${ct}_domain"} = ($domains{$r->{dmid}} ||= - Mgd::domain_factory->get_by_id($r->{dmid})->name); + Mgd::domain_factory()->get_by_id($r->{dmid})->name); $ret->{"fid_${ct}_class"} = ($classes{$r->{dmid}}{$r->{classid}} ||= - Mgd::class_factory->get_by_id($r->{dmid}, $r->{classid})->name); + Mgd::class_factory()->get_by_id($r->{dmid}, $r->{classid})->name); $ret->{"fid_${ct}_key"} = $r->{dkey}; $ret->{"fid_${ct}_length"} = $r->{length}; $ret->{"fid_${ct}_devcount"} = $r->{devcount}; diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 240bbf22..22e52b6a 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -3,6 +3,7 @@ package MogileFS::Worker::Reaper; use strict; use base 'MogileFS::Worker'; +use MogileFS::Server; use MogileFS::Util qw(every error debug); use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 8d9918d4..ed4ab856 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -8,6 +8,7 @@ use fields ( ); use List::Util (); +use MogileFS::Server; use MogileFS::Util qw(error every debug); use MogileFS::Config; use MogileFS::ReplicationRequest qw(rr_upgrade); From 68467598cdf41d38f0eaf4676f582b122a8ae913 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Tue, 30 Aug 2011 16:02:52 +0100 Subject: [PATCH 092/405] Fix broken logging --- CHANGES | 3 +++ lib/MogileFS/Util.pm | 1 + lib/MogileFS/Worker/Fsck.pm | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 7a8879c1..5e4e8d22 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,7 @@ * Fix listing FIDs. + * Fix fsck fixing a file from logging a message which appears to be + two commands (due to spurious \n). Also fix debug function to + not be able to log messages like this. 2011-08-12: Release version 2.53 diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 143f9576..c164a7dd 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -89,6 +89,7 @@ sub every { sub debug { my ($msg, $level) = @_; return unless $Mgd::DEBUG >= 1; + $msg =~ s/[\r\n]+//g; if (my $worker = MogileFS::ProcManager->is_child) { $worker->send_to_parent("debug $msg"); } else { diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 9fe082be..96b8140a 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -257,7 +257,7 @@ sub parallel_check_sizes { use constant CANT_FIX => 0; sub fix_fid { my ($self, $fid) = @_; - debug(sprintf("Fixing FID %d\n", $fid->id)); + debug(sprintf("Fixing FID %d", $fid->id)); # This should happen first, since the fid gets awkwardly reloaded... $fid->update_devcount; From 1c3d73f1ed9e0ffc9b18cea6047a7adda000d321 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 18 Sep 2011 20:06:45 -0700 Subject: [PATCH 093/405] fid size workaround for lighttpd 1.4.x Fixes Issue 39. Lighttpd doesn't return a Content-Length header for zero byte files. So we add a specific workaround for it. If it's not lighttpd, we should still throw errors if that header is missing. --- lib/MogileFS/HTTPFile.pm | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 65c8ec2d..8fbe031a 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -112,7 +112,14 @@ sub size { my $res = $user_agent->head($path); if ($res->is_success) { delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; - return $res->header('content-length'); + my $size = $res->header('content-length'); + if (! defined $size && + $res->header('server') =~ m/^lighttpd/) { + # lighttpd 1.4.x (main release) does not return content-length for + # 0 byte files. + return 0; + } + return $size; } else { if ($res->code == 404) { delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; From 8993d24a17189bfd01258ad676e185484c841e09 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 19 Oct 2011 17:50:23 -0700 Subject: [PATCH 094/405] sqlite: fix mogdbsetup failure on new setups upgrade_modify_device_size() should not fail on new setups. SQLite won't enforce MEDIUMINT vs INT anyways, so it's safe to always continue and store larger INT values in MEDIUMINT columns. --- lib/MogileFS/Store/SQLite.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 567b2ae1..a66efb3c 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -236,6 +236,7 @@ sub upgrade_add_device_drain { } sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } +sub upgrade_modify_device_size { 1 } # inefficient, but no warning and no locking sub should_begin_replicating_fidid { From 6f4fe33d395fcd59671e3cd362b3a6cfb3a19a2d Mon Sep 17 00:00:00 2001 From: "git.user" Date: Fri, 28 Oct 2011 17:06:27 -0700 Subject: [PATCH 095/405] Fix mb_free() counting empty device as missing --- lib/MogileFS/Device.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 18dbc221..b559fc33 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -143,7 +143,7 @@ sub should_get_new_files { sub mb_free { my $self = shift; return $self->{mb_total} - $self->{mb_used} - if $self->{mb_total} && $self->{mb_used}; + if $self->{mb_total} && defined $self->{mb_used}; } sub mb_used { From c4989f582f12950303a1395645dde5471634efb0 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Tue, 27 Sep 2011 15:04:45 -0400 Subject: [PATCH 096/405] the exist method no longer exists Update the if condition to match the if condition of the similar code blocks in the Delete Worker --- lib/MogileFS/Worker/Delete.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index 56665bfa..c22a70e6 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -309,7 +309,7 @@ sub process_deletes { # (Note: we're tolerant of '0' as a devid, due to old buggy version which # would sometimes put that in there) my $dev = $devid ? Mgd::device_factory()->get_by_id($devid) : undef; - unless ($dev && $dev->exists) { + unless ($dev) { $done_with_devid->("devid_doesnt_exist"); next; } From 5d01811ce47b888ca710985bf31ec45c4b7cadfa Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 28 Oct 2011 19:01:49 -0700 Subject: [PATCH 097/405] Allow default class to be overridden again. Also makes its support slightly more DWIM. If you ask to modify the default class, it will be autoadded to the database if it hasn't yet. Add works as it should. Delete *doesn't work*. Once you add a default class to the database, you're stuck with it. You can continue to change it. We could "delete" it and walk it back to an original form, but it seems more correct to tell people to not do that. --- lib/MogileFS/Store.pm | 9 +++++++-- lib/MogileFS/Worker/Query.pm | 12 +++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0c681075..5597f0c6 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -776,17 +776,22 @@ sub create_class { my $maxid = $dbh->selectrow_array ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; + my $clsid = $maxid + 1; + if ($classname eq 'default') { + $clsid = 0; + } + # now insert the new class my $rv = eval { $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", - undef, $dmid, $maxid + 1, $classname, 2); + undef, $dmid, $clsid, $classname, 2); }; if ($@ || $dbh->err) { if ($self->was_duplicate_error) { throw("dup"); } } - return $maxid + 1 if $rv; + return $clsid if $rv; $self->condthrow; die; } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index d4af1174..161af279 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -844,8 +844,11 @@ sub cmd_create_class { return $self->err_line('domain_not_found'); my $clsid = $sto->get_classid_by_name($dmid, $class); + if (!defined $clsid && $args->{update} && $class eq 'default') { + $args->{update} = 0; + } if ($args->{update}) { - return $self->err_line('class_not_found') if ! $clsid; + return $self->err_line('class_not_found') if ! defined $clsid; $sto->update_class_name(dmid => $dmid, classid => $clsid, classname => $class); } else { @@ -884,11 +887,13 @@ sub cmd_delete_class { my $class = $args->{class}; return $self->err_line('no_class') unless length $domain; + return $self->err_line('nodel_default_class') if $class eq 'default'; + my $sto = Mgd::get_store(); my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - my $clsid = $sto->get_classid_by_name($dmid, $class) or - return $self->err_line('class_not_found'); + my $clsid = $sto->get_classid_by_name($dmid, $class); + return $self->err_line('class_not_found') unless defined $clsid; if (eval { Mgd::get_store()->delete_class($dmid, $clsid) }) { return $self->ok_line({ domain => $domain, class => $class }); @@ -1715,6 +1720,7 @@ sub err_line { 'rebal_not_started' => "Rebalance not running", 'no_rebal_state' => "No available rebalance status", 'no_rebal_policy' => "No rebalance policy available", + 'nodel_default_class' => "Cannot delete the default class", }->{$err_code} || $err_code; my $delay = ''; From fd238cd820dc4f7983c121bbce6b7388f1dc2bd5 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 29 Oct 2011 15:17:53 -0700 Subject: [PATCH 098/405] Issue 36: Further insulated from autoinc resets If MySQL is restarted and tempfile loses its autoinc counter, normally mogilefs will detect that a new fid insertion is a duplicate and resets it to MAX(fid) FROM file. However this doesn't work as well when your low fids may be deleted. So now, trackers check MAX(fid) on start and refuse to go below that. Needs further work but this will go a long way. --- lib/MogileFS/Config.pm | 12 ++++++++++++ lib/MogileFS/Store.pm | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index a083a477..8f215c49 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -42,6 +42,7 @@ sub set_config_no_broadcast { } set_config('default_mindevcount', 2); +set_config('min_fidid', 0); our ( %cmdline, @@ -236,6 +237,17 @@ Details: [sto=$sto, err=$@] } $sto->pre_daemonize_checks; + + # If MySQL gets restarted InnoDB may reset its auto_increment counter. If + # the first few fids have been deleted, the "reset to max on duplicate" + # code won't fire immediately. + # Instead, we also trigger it if a configured "min_fidid" is higher than + # what we got from innodb. + # For bonus points: This value should be periodically updated, in case the + # trackers don't go down as often as the database. + my $min_fidid = $sto->max_fidid; + $min_fidid = 0 unless $min_fidid; + set_config('min_fidid', $min_fidid); } # set_server_setting( key, value ) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 5597f0c6..7171deaf 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -969,8 +969,11 @@ sub register_tempfile { return $exists ? 1 : 0; }; + # See notes in MogileFS::Config->check_database + my $min_fidid = MogileFS::Config->config('min_fidid'); + # if the fid is in use, do something - while ($fid_in_use->($fid)) { + while ($fid_in_use->($fid) || $fid <= $min_fidid) { throw("dup") if $explicit_fid_used; # be careful of databases which reset their From 6f29c3036ad64274305afc58bd3a841a8062775c Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 29 Oct 2011 15:28:09 -0700 Subject: [PATCH 099/405] Checking in changes prior to tagging of version 2.54. Changelog diff is: diff --git a/CHANGES b/CHANGES index 5e4e8d2..79e8a7c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,21 @@ +2011-10-29: Release version 2.54 + + * Issue 36: Further insulate from mysql restarts resetting the fid counter (dormando ) + + * Allow default class to be overridden again. (dormando ) + + * Fix crash in old delete worker code (Daniel Frett ) + + * Fix mb_free() counting empty device as missing (git.user ) + + * sqlite: fix mogdbsetup failure on new setups (Eric Wong ) + + * fid size workaround for lighttpd 1.4.x (dormando ) + + * Fix various issues in the new mogilefs server query worker. (Tomas Doran ) + + * Allow chunked PUT's of unlimited size (dormando ) + * Fix listing FIDs. * Fix fsck fixing a file from logging a message which appears to be two commands (due to spurious \n). Also fix debug function to --- CHANGES | 18 ++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 5e4e8d22..79e8a7c5 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,21 @@ +2011-10-29: Release version 2.54 + + * Issue 36: Further insulate from mysql restarts resetting the fid counter (dormando ) + + * Allow default class to be overridden again. (dormando ) + + * Fix crash in old delete worker code (Daniel Frett ) + + * Fix mb_free() counting empty device as missing (git.user ) + + * sqlite: fix mogdbsetup failure on new setups (Eric Wong ) + + * fid size workaround for lighttpd 1.4.x (dormando ) + + * Fix various issues in the new mogilefs server query worker. (Tomas Doran ) + + * Allow chunked PUT's of unlimited size (dormando ) + * Fix listing FIDs. * Fix fsck fixing a file from logging a message which appears to be two commands (due to spurious \n). Also fix debug function to diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index e873a413..fb4ece34 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.53"; +$VERSION = "2.54"; =head1 NAME From a29272e1fd2433d13eae7c236bf779483d82e75b Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 12 Nov 2011 23:22:42 -0800 Subject: [PATCH 100/405] Wrap the queue tx with a mutex lock under mysql MySQL versions are very deadlocky with this pattern, unfortunately. Worse yet, under 5.1+ INSERT's can block forever so long as the SELECT's overlap. Wrapping it in a lock to serialize the transaction (which is fine for performance, anyway) stops the INSERT's from blocking and reduces the deadlocks a little. --- lib/MogileFS/Store.pm | 8 ++++++++ lib/MogileFS/Store/MySQL.pm | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 7171deaf..3eba758b 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1587,6 +1587,8 @@ sub grab_queue_chunk { my $tries = 3; my $work; + return 0 unless $self->lock_queue($queue); + my $extwhere = shift || ''; my $fields = 'fid, nexttry, failcount'; $fields .= ', ' . $extfields if $extfields; @@ -1613,6 +1615,7 @@ sub grab_queue_chunk { $dbh->do("UPDATE $queue SET nexttry = $ut + 1000 WHERE fid IN ($fidlist)"); $dbh->commit; }; + $self->unlock_queue($queue); if ($self->was_deadlock_error) { eval { $dbh->rollback }; return (); @@ -1980,6 +1983,11 @@ sub release_lock { die "release_lock not implemented for $self"; } +# MySQL has an issue where you either get excessive deadlocks, or INSERT's +# hang forever around some transactions. Use ghetto locking to cope. +sub lock_queue { 1 } +sub unlock_queue { 1 } + # returns up to $limit @fidids which are on provided $devid sub random_fids_on_device { my ($self, $devid, $limit) = @_; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 514663ee..6b271b44 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -127,6 +127,18 @@ sub release_lock { return $rv; } +sub lock_queue { + my ($self, $type) = @_; + my $lock = $self->get_lock('mfsd:' . $type, 30); + return $lock ? 1 : 0; +} + +sub unlock_queue { + my ($self, $type) = @_; + my $lock = $self->release_lock('mfsd:' . $type); + return $lock ? 1 : 0; +} + # clears everything from the fsck_log table # return 1 on success. die otherwise. # Under MySQL 4.1+ this is actually fast. From 10f21a3c76d774687b6a6403757327f81739759b Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 13 Nov 2011 18:42:15 -0800 Subject: [PATCH 101/405] restore new file upload replication priority now that the queues are locked uniquely, and MySQL 5.1+ does better at avoiding deadlocks, return to scheduling new file uploads as "0" (immediate) so reaper work doesn't gum them up. --- lib/MogileFS/Store.pm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 3eba758b..27b45282 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1325,8 +1325,10 @@ sub update_classid { sub enqueue_for_replication { my ($self, $fidid, $from_devid, $in) = @_; - $in = 0 unless $in; - my $nexttry = $self->unix_timestamp . " + " . int($in); + my $nexttry = 0; + if ($in) { + $nexttry = $self->unix_timestamp . " + " . int($in); + } $self->retry_on_deadlock(sub { $self->insert_ignore("INTO file_to_replicate (fid, fromdevid, nexttry) ". From a5005023ca5e9ecf54180e7bd23e5a8432c28136 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 13 Nov 2011 21:06:48 -0800 Subject: [PATCH 102/405] stop enforcing a source id on new file upload file_on rows have been added ahead of the replication call since... forever? so just use it. Over-replication bugs on new upload get jammed up due to this. I just can't think of a justification for keeping it the way it was. --- lib/MogileFS/Worker/Query.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 161af279..1f70395a 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -432,7 +432,7 @@ sub cmd_create_close { ); # mark it as needing replicating: - $fid->enqueue_for_replication(from_device => $devid); + $fid->enqueue_for_replication(); if ($fid->update_devcount) { # call the hook - if this fails, we need to back the file out From b04b5a93ce1a1285877aad15f8d6e1801f348097 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 13 Nov 2011 21:10:50 -0800 Subject: [PATCH 103/405] schedule fsck replications for the future. --- lib/MogileFS/Worker/Fsck.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 96b8140a..08676708 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -347,7 +347,7 @@ sub fix_fid { # Note: this will reload devids, if they called 'note_on_device' # or 'forget_about_device' unless ($fid->devids_meet_policy) { - $fid->enqueue_for_replication; + $fid->enqueue_for_replication(in => 1); $fid->fsck_log(EV_RE_REPLICATE); return HANDLED; } From b9958bbc3a07fea8cf143b99ba2740615dd1ed10 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 14 Nov 2011 14:00:34 -0800 Subject: [PATCH 104/405] return 1 from condthrow if all is well many functions just return from condthrow, but we often check their return values :/ Thanks to Eric Wong for the tip. --- lib/MogileFS/Store.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 27b45282..9c449099 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -297,7 +297,7 @@ sub ping { sub condthrow { my ($self, $optmsg) = @_; my $dbh = $self->dbh; - return unless $dbh->err; + return 1 unless $dbh->err; my ($pkg, $fn, $line) = caller; my $msg = "Database error from $pkg/$fn/$line: " . $dbh->errstr; $msg .= ": $optmsg" if $optmsg; From d85bb2c39729bcfc5d65658c0cd2f8e444f3443c Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 14 Nov 2011 14:38:12 -0800 Subject: [PATCH 105/405] More stats to help troubleshoot rebalance Need to be able to see more easily if a rebalance is jammed. Also fix a random warning. --- lib/MogileFS/Rebalance.pm | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index bd282df4..1f928fa5 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -59,6 +59,9 @@ my %default_state = ( time_started => 0, time_finished => 0, time_stopped => 0, + time_latest_run => 0, + time_latest_empty_run => 0, + empty_runs => 0, ); sub new { @@ -165,6 +168,7 @@ sub _parse_settings { # the constraint also serves as a set of defaults. %parsed = %{$constraint} if ($constraint); + return unless $settings; # parse out from a string: key=value key=value for my $tuple (split /\s/, $settings) { my ($key, $value) = split /=/, $tuple; @@ -244,6 +248,12 @@ sub next_fids_to_rebalance { push(@devfids, [$fid->id, $sdev->id, $destdevs]); } + $state->{time_latest_run} = time; + unless (@devfids) { + $state->{empty_runs}++; + $state->{time_latest_empty_run} = time; + } + # return block of fiddev combos. return \@devfids; } From 6eff95352710fef92f90204e5eee1f72e584e798 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 14 Nov 2011 15:32:11 -0800 Subject: [PATCH 106/405] fixed sources in replication are now a suggestion Code used to hard error if a fixed source was missing. That would be useful if, say, FSCK decides that some copies are bad and it should be re-sourced from a particular device. However FSCK can just nuke or mask bad rows and re-run. So... attempt, then give up. We can always revert the behavior if some feature requires it. New file uploads no longer use fixed sources, which gets rid of most of the issue. This change should let existing clusters clean themselves up. --- lib/MogileFS/Worker/Replicate.pm | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index ed4ab856..9d64afa0 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -305,15 +305,14 @@ sub replicate { my $errref = delete $opts{'errref'}; my $no_unlock = delete $opts{'no_unlock'}; - my $sdevid = delete $opts{'source_devid'}; + my $fixed_source = delete $opts{'source_devid'}; my $mask_devids = delete $opts{'mask_devids'} || {}; my $avoid_devids = delete $opts{'avoid_devids'} || {}; my $target_devids = delete $opts{'target_devids'} || []; # inverse of avoid_devids. die "unknown_opts" if %opts; die unless ref $mask_devids eq "HASH"; - # bool: if source was explicitly requested by caller - my $fixed_source = $sdevid ? 1 : 0; + my $sdevid; my $sto = Mgd::get_store(); my $unlock = sub { @@ -383,9 +382,8 @@ sub replicate { return $retunlock->(0, "no_source", "Source is no longer available replicating $fidid") if @on_devs == 0; return $retunlock->(0, "source_down", "No alive devices available replicating $fidid") if @on_up_devid == 0; - # if they requested a specific source, that source must be up. - if ($sdevid && ! grep { $_ == $sdevid} @on_up_devid) { - return $retunlock->(0, "source_down", "Requested replication source device $sdevid not available for $fidid"); + if ($fixed_source && ! grep { $_ == $fixed_source } @on_up_devid) { + error("Fixed source dev$fixed_source requested for $fidid but not available. Trying other devices"); } my %dest_failed; # devid -> 1 for each devid we were asked to copy to, but failed. @@ -464,13 +462,17 @@ sub replicate { } # find where we're replicating from - unless ($fixed_source) { + { # TODO: use an observed good device+host as source to start. my @choices = grep { ! $source_failed{$_} } @on_up_devid; return $retunlock->(0, "source_down", "No devices available replicating $fidid") unless @choices; - @choices = List::Util::shuffle(@choices); - MogileFS::run_global_hook('replicate_order_final_choices', $devs, \@choices); - $sdevid = shift @choices; + if ($fixed_source && grep { $_ == $fixed_source } @choices) { + $sdevid = $fixed_source; + } else { + @choices = List::Util::shuffle(@choices); + MogileFS::run_global_hook('replicate_order_final_choices', $devs, \@choices); + $sdevid = shift @choices; + } } my $worker = MogileFS::ProcManager->is_child or die; @@ -490,10 +492,8 @@ sub replicate { if ($copy_err eq "src_error") { $source_failed{$sdevid} = 1; - if ($fixed_source) { - # there can't be any more retries, as this source - # is busted and is the only one we wanted. - return $retunlock->(0, "copy_error", "error copying fid $fidid from devid $sdevid during replication"); + if ($fixed_source && $fixed_source == $sdevid) { + error("Fixed source dev$fixed_source was requested for $fidid but failed: will try other sources"); } } else { From b8fb55a49d060c5099cf1df455c9e3b3934d751a Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 14 Nov 2011 15:38:18 -0800 Subject: [PATCH 107/405] Checking in changes prior to tagging of version 2.55. Changelog diff is: diff --git a/CHANGES b/CHANGES index 79e8a7c..d03029f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,19 @@ +2011-11-14: Release version 2.55 + + * fixed sources in replication are now a suggestion (dormando ) + + * More stats to help troubleshoot rebalance (dormando ) + + * return 1 from condthrow if all is well (dormando ) + + * schedule fsck replications for the future. (dormando ) + + * stop enforcing a source id on new file upload (dormando ) + + * restore new file upload replication priority (dormando ) + + * Prevent queue related MySQL query hangs (dormando ) + 2011-10-29: Release version 2.54 * Issue 36: Further insulate from mysql restarts resetting the fid counter (dormando ) --- CHANGES | 16 ++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 79e8a7c5..d03029f9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,19 @@ +2011-11-14: Release version 2.55 + + * fixed sources in replication are now a suggestion (dormando ) + + * More stats to help troubleshoot rebalance (dormando ) + + * return 1 from condthrow if all is well (dormando ) + + * schedule fsck replications for the future. (dormando ) + + * stop enforcing a source id on new file upload (dormando ) + + * restore new file upload replication priority (dormando ) + + * Prevent queue related MySQL query hangs (dormando ) + 2011-10-29: Release version 2.54 * Issue 36: Further insulate from mysql restarts resetting the fid counter (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index fb4ece34..f6b3b377 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.54"; +$VERSION = "2.55"; =head1 NAME From 325f50db2d29ba50391de5feed23fcea1d9f5053 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Nov 2011 01:39:18 +0000 Subject: [PATCH 108/405] worker: use timeout with read_from_parent() instead of sleep Since read_from_parent() got an optional timeout to use with select(), we don't have to blindly sleep and use a non-blocking read anymore. --- lib/MogileFS/Worker.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 93375c4a..1f00c170 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -62,9 +62,8 @@ sub forget_that_monitor_has_run { sub wait_for_monitor { my $self = shift; while (! $self->monitor_has_run) { - $self->read_from_parent; + $self->read_from_parent(1); $self->still_alive; - sleep 1; } } From 80a479a1f1e8bfb4ef7e78efb73009bf1d873296 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Nov 2011 01:39:18 +0000 Subject: [PATCH 109/405] queryworker: reimplement "clear_caches" command Technically, it "refreshes" the cache and usage information, but the effect is the same with the new monitor. By swapping do_monitor_round for clear_caches in the tests, tests run about 30s faster on my machine. Additionally, all changes to devices/hosts/domains/classes should refresh the cache on a given worker. --- lib/MogileFS/ProcManager.pm | 2 + lib/MogileFS/Worker/Monitor.pm | 93 +++++++++++++++++++++------------- lib/MogileFS/Worker/Query.pm | 26 +++++----- t/00-startup.t | 4 +- t/10-weighting.t | 4 +- t/20-filepaths.t | 2 +- t/30-rebalance.t | 4 +- 7 files changed, 82 insertions(+), 53 deletions(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 669c6e59..dd0b389a 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -707,6 +707,8 @@ sub HandleChildRequest { # and this will rebroadcast it to all other children # (including the one that just set it to us, but eh) MogileFS::Config->set_config($1, $2); + } elsif ($cmd =~ /^:refresh_monitor$/) { + MogileFS::ProcManager->ImmediateSendToChildrenByJob("monitor", $cmd); } else { # unknown command my $show = $cmd; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 28d7418c..9280380d 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -42,6 +42,50 @@ sub watchdog_timeout { 30; } +sub cache_refresh { + my $self = shift; + + debug("Monitor running; checking DB for updates"); + $self->validate_dbh; + + my $db_data = $self->grab_all_data; + + # Stack diffs to ship back later + $self->diff_data($db_data); + + $self->send_events_to_parent; +} + +sub usage_refresh { + my $self = shift; + + debug("Monitor running; scanning usage files"); + $self->validate_dbh; + + $self->{skip_host} = {}; # hostid -> 1 if already noted dead. + $self->{seen_hosts} = {}; # IP -> 1 + + my $dev_factory = MogileFS::Factory::Device->get_factory(); + + my $cur_iow = {}; + # Run check_devices to test host/devs. diff against old values. + for my $dev ($dev_factory->get_all) { + if (my $state = $self->is_iow_diff($dev)) { + $self->state_event('device', $dev->id, {utilization => $state}); + } + $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; + next if $self->{skip_host}{$dev->hostid}; + $self->check_device($dev) if $dev->dstate->should_monitor; + } + + $self->{devutil}->{prev} = $cur_iow; + # Set the IOWatcher hosts (once old monitor code has been disabled) + + $self->send_events_to_parent; + + $self->{iow}->set_hosts(keys %{$self->{seen_hosts}}); +} + sub work { my $self = shift; @@ -71,15 +115,7 @@ sub work { my $db_monitor; $db_monitor = sub { $self->parent_ping; - debug("Monitor running; checking DB for updates"); - $self->validate_dbh; - - my $db_data = $self->grab_all_data; - - # Stack diffs to ship back later - $self->diff_data($db_data); - - $self->send_events_to_parent; + $self->cache_refresh; $db_monitor_ran++; Danga::Socket->AddTimer(4, $db_monitor); }; @@ -90,31 +126,7 @@ sub work { my $main_monitor; $main_monitor = sub { $self->parent_ping; - debug("Monitor running; scanning usage files"); - $self->validate_dbh; - - $self->{skip_host} = {}; # hostid -> 1 if already noted dead. - $self->{seen_hosts} = {}; # IP -> 1 - - my $dev_factory = MogileFS::Factory::Device->get_factory(); - - my $cur_iow = {}; - # Run check_devices to test host/devs. diff against old values. - for my $dev ($dev_factory->get_all) { - if (my $state = $self->is_iow_diff($dev)) { - $self->state_event('device', $dev->id, {utilization => $state}); - } - $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; - next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev) if $dev->dstate->should_monitor; - } - - $self->{devutil}->{prev} = $cur_iow; - # Set the IOWatcher hosts (once old monitor code has been disabled) - - $self->send_events_to_parent; - - $iow->set_hosts(keys %{$self->{seen_hosts}}); + $self->usage_refresh; if ($db_monitor_ran) { $self->send_to_parent(":monitor_just_ran"); $db_monitor_ran = 0; @@ -123,9 +135,22 @@ sub work { }; $main_monitor->(); + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); Danga::Socket->EventLoop; } +sub process_line { + my MogileFS::Worker::Monitor $self = shift; + my $lineref = shift; + if ($$lineref =~ /^:refresh_monitor$/) { + $self->cache_refresh; + $self->usage_refresh; + $self->send_to_parent(":monitor_just_ran"); + return 1; + } + return 0; +} + # -------------------------------------------------------------------------- # Flattens and flips events up to the parent. Can be huge on startup! diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 1f70395a..da876658 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -189,9 +189,11 @@ sub cmd_clear_cache { my MogileFS::Worker::Query $self = shift; my $args = shift; - # TODO: Use this to tell Monitor worker to rebroadcast all state + $self->forget_that_monitor_has_run; + $self->send_to_parent(":refresh_monitor"); + $self->wait_for_monitor; - return $self->ok_line; + return $self->ok_line(@_); } sub cmd_create_open { @@ -475,7 +477,7 @@ sub cmd_updateclass { $fid->enqueue_for_replication(); } - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_delete { @@ -772,7 +774,7 @@ sub cmd_create_device { } if (eval { $sto->create_device($devid, $hostid, $status) }) { - return $self->ok_line; + return $self->cmd_clear_cache; } my $errc = error_code($@); @@ -795,7 +797,7 @@ sub cmd_create_domain { return $self->err_line('failure', "$@"); } - return $self->ok_line({ domain => $domain }); + return $self->cmd_clear_cache({ domain => $domain }); } sub cmd_delete_domain { @@ -810,7 +812,7 @@ sub cmd_delete_domain { return $self->err_line('domain_not_found'); if (eval { $sto->delete_domain($dmid) }) { - return $self->ok_line({ domain => $domain }); + return $self->cmd_clear_cache({ domain => $domain }); } my $err = error_code($@); @@ -867,7 +869,7 @@ sub cmd_create_class { replpolicy => $replpolicy) if $replpolicy; # return success - return $self->ok_line({ class => $class, mindevcount => $mindevcount, domain => $domain }); + return $self->cmd_clear_cache({ class => $class, mindevcount => $mindevcount, domain => $domain }); } sub cmd_update_class { @@ -896,7 +898,7 @@ sub cmd_delete_class { return $self->err_line('class_not_found') unless defined $clsid; if (eval { Mgd::get_store()->delete_class($dmid, $clsid) }) { - return $self->ok_line({ domain => $domain, class => $class }); + return $self->cmd_clear_cache({ domain => $domain, class => $class }); } my $errc = error_code($@); @@ -943,7 +945,7 @@ sub cmd_create_host { $sto->update_host($hostid, { map { $_ => $args->{$_} } @toupdate }); # return success - return $self->ok_line({ hostid => $hostid, hostname => $hostname }); + return $self->cmd_clear_cache({ hostid => $hostid, hostname => $hostname }); } sub cmd_update_host { @@ -970,7 +972,7 @@ sub cmd_delete_host { $sto->delete_host($hostid); - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_get_domains { @@ -1314,7 +1316,7 @@ sub cmd_set_weight { $dev->set_weight($weight); - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_set_state { @@ -1338,7 +1340,7 @@ sub cmd_set_state { unless $dev->can_change_to_state($state); Mgd::get_store()->set_device_state($dev->id, $state); - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_noop { diff --git a/t/00-startup.t b/t/00-startup.t index 90b632b0..68284bc2 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -108,9 +108,9 @@ ok($tmptrack->mogadm("device", "add", "hostB", 4), "created dev4 on hostB"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } diff --git a/t/10-weighting.t b/t/10-weighting.t index 5f7c41f5..a97b9287 100644 --- a/t/10-weighting.t +++ b/t/10-weighting.t @@ -80,9 +80,9 @@ ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } diff --git a/t/20-filepaths.t b/t/20-filepaths.t index 5c9b1c61..96cff2d9 100644 --- a/t/20-filepaths.t +++ b/t/20-filepaths.t @@ -70,7 +70,7 @@ ok($mogc->filepaths_enable, "Filepaths enabled successfully"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } diff --git a/t/30-rebalance.t b/t/30-rebalance.t index 1d5b560f..4f19a164 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -82,9 +82,9 @@ ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } From 719334c9a531c3c4c03e616036691209882f0d83 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Nov 2011 04:52:39 +0000 Subject: [PATCH 110/405] t/00-startup.t: remove 5-second sleep It appears to no longer be needed. However, due to potential race conditions and the presence of multiple query workers, we still need to clear_caches in some places. --- t/00-startup.t | 3 --- 1 file changed, 3 deletions(-) diff --git a/t/00-startup.t b/t/00-startup.t index 68284bc2..5250d25a 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -276,9 +276,6 @@ ok($tmptrack->mogadm("host", "add", "hostC", "--ip=$hostC_ip", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC"); ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); -# let it be discovered -sleep(5); # FIXME: make an explicit "rescan" or "remonitor" job to mogilefsd, just for test suite - ok($tmptrack->mogadm("device", "mark", "hostB", 3, "dead"), "marked device B/3 dead"); ok($tmptrack->mogadm("device", "mark", "hostB", 4, "dead"), "marked device B/4 dead"); From 42d66ab4856b62049041521f9526f0f39e97bdbb Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 28 Nov 2011 19:13:55 -0800 Subject: [PATCH 111/405] don't clear state caches for cmd_update_class command updates the class attached to a FID, not necessary to clear metadata caches. --- lib/MogileFS/Worker/Query.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index da876658..caca5083 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -477,7 +477,7 @@ sub cmd_updateclass { $fid->enqueue_for_replication(); } - return $self->cmd_clear_cache; + return $self->ok_line; } sub cmd_delete { From 1b4df932aa9435b227d3f27df27a732ed9c5fef3 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 28 Nov 2011 21:53:27 -0800 Subject: [PATCH 112/405] make reaper wait less time in test mode actually makes anything not wait more time in test mode. since in all automatic test cases we cause clear_cache to run before examining anything. --- lib/MogileFS/FID.pm | 7 +++++++ t/00-startup.t | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 76af0ad7..0012582b 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -6,6 +6,11 @@ use MogileFS::ReplicationRequest qw(rr_upgrade); use MogileFS::Server; use overload '""' => \&as_string; +BEGIN { + my $testing = $ENV{TESTING} ? 1 : 0; + eval "sub TESTING () { $testing }"; +} + sub new { my ($class, $fidid) = @_; croak("Invalid fidid") unless $fidid; @@ -139,6 +144,8 @@ sub enqueue_for_replication { my $from_dev = delete $opts{from_device}; # devid or Device object croak("Unknown options to enqueue_for_replication") if %opts; my $from_devid = (ref $from_dev ? $from_dev->id : $from_dev) || undef; + # Still schedule for the future, but don't delay long + $in = 1 if (TESTING && $in); Mgd::get_store()->enqueue_for_replication($self->id, $from_devid, $in); } diff --git a/t/00-startup.t b/t/00-startup.t index 5250d25a..906817c6 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -9,6 +9,10 @@ use MogileFS::Server; use MogileFS::Util qw(error_code); use MogileFS::Test; +BEGIN { + $ENV{TESTING} = 1; +} + find_mogclient_or_skip(); # use mogadm to init it, From 64fa8633b929748c99eb637350514eb9d0f00340 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 30 Nov 2011 00:48:20 +0000 Subject: [PATCH 113/405] fix return value of commands that clear_cache The MogileFS::Admin test case cares about the return value from the library when using "create_domain", even if mogadm does not. --- lib/MogileFS/Worker/Query.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index caca5083..30ee72a2 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -187,7 +187,6 @@ sub cmd_test { sub cmd_clear_cache { my MogileFS::Worker::Query $self = shift; - my $args = shift; $self->forget_that_monitor_has_run; $self->send_to_parent(":refresh_monitor"); From 5fecab1d06495bd848bf203fa10c2b98e63cfd22 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 1 Dec 2011 18:39:37 -0800 Subject: [PATCH 114/405] provide a server setting cache via monitor worker --- lib/MogileFS/Config.pm | 14 ++++++++++++-- lib/MogileFS/Store.pm | 16 ---------------- lib/MogileFS/Util.pm | 7 +++++++ lib/MogileFS/Worker/Monitor.pm | 14 ++++++++++++-- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 8f215c49..3b9c394b 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -21,6 +21,7 @@ use constant REBAL_QUEUE => 2; use constant DEVICE_SUMMARY_CACHE_TIMEOUT => 15; my %conf; +my %server_settings; sub set_config { shift if @_ == 3; my ($k, $v) = @_; @@ -269,9 +270,18 @@ sub server_setting { return Mgd::get_store()->server_setting($key); } +sub cache_server_setting { + my ($class, $key, $val) = @_; + if (! defined $val) { + delete $server_settings{$key} + if exists $server_settings{$key}; + } + $server_settings{$key} = $val; +} + sub server_setting_cached { - my ($class, $key, $timeout) = @_; - return Mgd::get_store()->server_setting_cached($key, $timeout); + my ($class, $key) = @_; + return $server_settings{$key}; } my $memc; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 9c449099..7a0fe604 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -54,7 +54,6 @@ sub new_from_dsn_user_pass { recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was handles_left => 0, # amount of times this handle can still be verified - server_setting_cache => {}, # value-agnostic db setting cache. }, $subclass; $self->init; return $self; @@ -875,21 +874,6 @@ sub server_setting { undef, $key); } -# generic server setting cache. -# note that you can call the same server setting with different timeouts, but -# the timeout specified at the time of ... timeout, wins. -sub server_setting_cached { - my ($self, $key, $timeout) = @_; - $self->{server_setting_cache}->{$key} ||= {val => '', refresh => 0}; - my $cache = $self->{server_setting_cache}->{$key}; - my $now = time(); - if ($now > $cache->{refresh}) { - $cache->{val} = $self->server_setting($key); - $cache->{refresh} = $now + $timeout; - } - return $cache->{val}; -} - sub server_settings { my ($self) = @_; my $ret = {}; diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index c164a7dd..882d7e1a 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -35,6 +35,13 @@ sub apply_state_events { my $type = delete $args->{ev_type}; my $id = delete $args->{ev_id}; + # This special case feels gross, but that's what it is. + if ($type eq 'srvset') { + my $val = $mode eq 'set' ? $args->{value} : undef; + MogileFS::Config->cache_server_setting($args->{field}, $val); + next; + } + my $old = $factories{$type}->get_by_id($id); if ($mode eq 'setstate') { # Host/Device only. diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 9280380d..924974d5 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -215,7 +215,9 @@ sub diff_data { my $id = $type eq 'domain' ? $item->{dmid} : $type eq 'class' ? $item->{dmid} . '-' . $item->{classid} : $type eq 'host' ? $item->{hostid} - : $type eq 'device' ? $item->{devid} : die "Unknown type"; + : $type eq 'device' ? $item->{devid} + : $type eq 'srvset' ? $item->{field} + : die "Unknown type"; my $old = delete $p_data->{$id}; # Special case: for devices, we don't care if mb_asof changes. # FIXME: Change the grab routine (or filter there?). @@ -262,10 +264,18 @@ sub grab_all_data { while (my ($name, $id) = each %dom) { push(@fixed_dom, { namespace => $name, dmid => $id }); } + + my $set = $sto->server_settings; + my @fixed_set = (); + while (my ($field, $value) = each %$set) { + push(@fixed_set, { field => $field, value => $value }); + } + my %ret = ( domain => \@fixed_dom, class => [$sto->get_all_classes], host => [$sto->get_all_hosts], - device => [$sto->get_all_devices], ); + device => [$sto->get_all_devices], + srvset => \@fixed_set, ); return \%ret; } From 905931a5b5ca0f0a9eae8aa66c469642e7844297 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 2 Dec 2011 15:34:13 -0800 Subject: [PATCH 115/405] Use cached server settings in more places primarily the "read only" paths of slave lists and memcached lists. secondarily a few random places in JobMaster to avoid poking the DB when we don't have to. --- lib/MogileFS/Config.pm | 2 +- lib/MogileFS/Store.pm | 4 ++-- lib/MogileFS/Worker/JobMaster.pm | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 3b9c394b..35ccc767 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -295,7 +295,7 @@ sub memcache_client { my $now = time(); return $memc if $last_memc_server_fetch > $now - 30; - my @servers = split(/\s*,\s*/, MogileFS::Config->server_setting("memcache_servers") || ""); + my @servers = split(/\s*,\s*/, MogileFS::Config->server_setting_cached("memcache_servers") || ""); $memc->set_servers(\@servers); $last_memc_server_fetch = $now; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 7a0fe604..d30d341a 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -183,12 +183,12 @@ sub _slaves_list { $self->{slave_list_cachetime} = $now; $self->{slave_list_cache} = []; - my $sk = MogileFS::Config->server_setting('slave_keys') + my $sk = MogileFS::Config->server_setting_cached('slave_keys') or return (); my @ret; foreach my $key (split /\s*,\s*/, $sk) { - my $slave = MogileFS::Config->server_setting("slave_$key"); + my $slave = MogileFS::Config->server_setting_cached("slave_$key"); if (!$slave) { error("key for slave DB config: slave_$key not found in configuration"); diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 627b4bce..238c67c7 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -112,7 +112,7 @@ sub _check_replicate_queues { sub _check_fsck_queues { my $self = shift; my $sto = shift; - my $fhost = MogileFS::Config->server_setting('fsck_host'); + my $fhost = MogileFS::Config->server_setting_cached('fsck_host'); if ($fhost && $fhost eq MogileFS::Config->hostname) { $self->_inject_fsck_queues($sto); } @@ -143,13 +143,13 @@ sub _inject_fsck_queues { $sto->fsck_log_summarize; my $queue_size = $sto->file_queue_length(FSCK_QUEUE); my $max_queue = - MogileFS::Config->server_setting_cached('queue_size_for_fsck', 60) || + MogileFS::Config->server_setting_cached('queue_size_for_fsck') || DEF_FSCK_QUEUE_MAX; return if ($queue_size >= $max_queue); my $max_checked = MogileFS::Config->server_setting('fsck_highest_fid_checked') || 0; my $to_inject = - MogileFS::Config->server_setting_cached('queue_rate_for_fsck', 60) || + MogileFS::Config->server_setting_cached('queue_rate_for_fsck') || DEF_FSCK_QUEUE_INJECT; my $fids = $sto->get_fidids_above_id($max_checked, $to_inject); unless (@$fids) { @@ -169,7 +169,7 @@ sub _inject_fsck_queues { sub _check_rebal_queues { my $self = shift; my $sto = shift; - my $rhost = MogileFS::Config->server_setting('rebal_host'); + my $rhost = MogileFS::Config->server_setting_cached('rebal_host'); if ($rhost && $rhost eq MogileFS::Config->hostname) { $self->_inject_rebalance_queues($sto); } @@ -195,12 +195,12 @@ sub _inject_rebalance_queues { my $queue_size = $sto->file_queue_length(REBAL_QUEUE); my $max_queue = - MogileFS::Config->server_setting_cached('queue_size_for_rebal', 60) || + MogileFS::Config->server_setting_cached('queue_size_for_rebal') || DEF_REBAL_QUEUE_MAX; return if ($queue_size >= $max_queue); my $to_inject = - MogileFS::Config->server_setting_cached('queue_rate_for_rebal', 60) || + MogileFS::Config->server_setting_cached('queue_rate_for_rebal') || DEF_REBAL_QUEUE_INJECT; # TODO: Cache the rebal object. Requires explicitly blowing it up at the @@ -262,7 +262,7 @@ sub _inject_rebalance_queues { # fast trying to keep the queue full. sub queue_depth_check { my $max_limit = - MogileFS::Config->server_setting_cached('internal_queue_limit', 120) + MogileFS::Config->server_setting_cached('internal_queue_limit') || 500; my ($depth, $limit) = @_; From 8afa0cd8182cb66f5725a071aea38d6ad7473eba Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 2 Dec 2011 15:45:10 -0800 Subject: [PATCH 116/405] Kill some dead code from Fsck worker --- lib/MogileFS/Worker/Fsck.pm | 104 ------------------------------------ 1 file changed, 104 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 08676708..71ae28b1 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -3,9 +3,6 @@ package MogileFS::Worker::Fsck; use strict; use base 'MogileFS::Worker'; use fields ( - 'last_stop_check', # unixtime 'should_stop_running' last called - 'last_maxcheck_write', # unixtime maxcheck written - 'size_checker', # subref which, given a DevFID, returns size of file 'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? ); use MogileFS::Util qw(every error debug); @@ -95,8 +92,6 @@ sub work { $self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; MogileFS::FID->mass_load_devids(@fids); - $self->init_size_checker(\@fids); - # don't sleep in loop, next round, since we found stuff to work on # this round... $sleep_set->(0); @@ -118,35 +113,6 @@ sub work { }); } -# only write to server_settings table our position every 5 seconds -sub set_max_checked_lazy { - my ($self, $nmax) = @_; - return 0 if $nowish < ($self->{last_maxcheck_write} || 0) + 5; - $self->{last_maxcheck_write} = $nowish; - $self->set_max_checked($nmax); -} - -sub set_max_checked { - my ($self, $nmax) = @_; - MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $nmax); -} - -# this version is accurate, -sub should_be_running { - my $self = shift; - my $fhost = MogileFS::Config->server_setting('fsck_host') - or return; - return $fhost eq MogileFS::Config->hostname; -} - -# this version is sloppy, optimized for speed. only checks db every 5 seconds. -sub should_stop_running { - my $self = shift; - return 0 if $nowish < ($self->{last_stop_check} || 0) + 5; - $self->{last_stop_check} = $nowish; - return ! $self->should_be_running; -} - # given a $fid (MogileFS::FID, with pre-populated ->devids data) # return 0 if reachability problems. # return 1 if fid was checked (regardless of there being problems or not) @@ -361,72 +327,6 @@ sub fix_fid { return HANDLED; } -sub init_size_checker { - my ($self, $fidlist) = @_; - - $self->still_alive; - - my $lo_fid = $fidlist->[0]->id; - my $hi_fid = $fidlist->[-1]->id; - - my %size; # $devid -> { $fid -> $size } - my %tried_bulkstat; # $devid -> 1 - - $self->{size_checker} = sub { - my $dfid = shift; - my $devid = $dfid->devid; - - if (my $map = $size{$devid}) { - return $map->{$dfid->fidid} || 0; - } - - unless ($tried_bulkstat{$devid}++) { - my $mogconn = $dfid->device->host->mogstored_conn; - my $sock = $mogconn->sock(5); - my $good = 0; - my $unknown_cmd = 0; - if ($sock) { - my $cmd = "fid_sizes $lo_fid-$hi_fid $devid\n"; - print $sock $cmd; - my $map = {}; - while (my $line = <$sock>) { - if ($line =~ /^\./) { - $good = 1; - last; - } elsif ($line =~ /^(\d+)\s+(\d+)\s+(\d+)/) { - my ($res_devid, $res_fid, $size) = ($1, $2, $3); - last unless $res_devid == $devid; - $map->{$res_fid} = $size; - } elsif ($line =~ /^ERR/) { - $unknown_cmd = 1; - last; - } else { - last; - } - } - - # we only update our $nowish (approximate time) lazily, when we - # know time might've advanced (like during potentially slow RPC call) - $nowish = $self->still_alive; - - if ($good) { - $size{$devid} = $map; - return $map->{$dfid->fidid} || 0; - } elsif (!$unknown_cmd) { - # mogstored connection is unknown state... can't - # trust it, so close it. - $mogconn->mark_dead; - } - } - error("fid_sizes mogstored cmd unavailable for dev $devid; using slower method"); - } - - # slow case (not using new command) - $nowish = $self->still_alive; - return $dfid->size_on_disk; - }; -} - # returns 0 on missing, # undef on connectivity error, # else size of file on disk (after HTTP HEAD or mogstored stat) @@ -434,10 +334,6 @@ sub size_on_disk { my ($self, $dfid) = @_; return undef if $dfid->device->dstate->is_perm_dead; return $dfid->size_on_disk; - # Mass checker is disabled for now... doesn't run on our production - # hosts due to massive gaps in the fids. Instead we make the process - # parallel and will rework it later. - #return $self->{size_checker}->($dfid); } 1; From 27b709eb9b032164536da39dfc183300d980919a Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 2 Dec 2011 18:22:36 -0800 Subject: [PATCH 117/405] validate_dbh now immediately pings dbh Change all callers to sleep unless having a valid dbh, in case of the master DB going down. This avoids crash spam, but it'll spam error messages as of this commit. --- lib/MogileFS/Server.pm | 10 +++++++++- lib/MogileFS/Worker/Delete.pm | 2 +- lib/MogileFS/Worker/Fsck.pm | 2 +- lib/MogileFS/Worker/JobMaster.pm | 2 +- lib/MogileFS/Worker/Monitor.pm | 10 +++++----- lib/MogileFS/Worker/Replicate.pm | 7 ++----- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index f6b3b377..18e8e7a6 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -227,7 +227,15 @@ sub server { } # database checking/connecting -sub validate_dbh { Mgd::get_store()->recheck_dbh } +sub validate_dbh { + my $sto = Mgd::get_store(); + $sto->recheck_dbh(); + my $dbh; + eval { $dbh = $sto->dbh }; + # Doesn't matter what the failure was; workers should retry later. + error("Error validating master DB: $@") if $@; + return $dbh; +} sub get_dbh { return Mgd::get_store()->dbh } # the eventual replacement for callers asking for a dbh directly: diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index c22a70e6..77a00e9d 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -35,7 +35,7 @@ sub work { while (1) { $self->send_to_parent("worker_bored 50 delete"); $self->read_from_parent(1); - $self->validate_dbh; + next unless $self->validate_dbh; # call our workers, and have them do things # RETVAL = 0; I think I am done working for now diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 71ae28b1..b1c42868 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -76,7 +76,7 @@ sub work { # This counts the same as a $self->still_alive; $self->send_to_parent('worker_bored 50 fsck'); return unless @{$queue_todo}; - $self->validate_dbh; + return unless $self->validate_dbh; my @fids = (); while (my $todo = shift @{$queue_todo}) { diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 238c67c7..feaa9f85 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -48,8 +48,8 @@ sub work { every(1, sub { # 'pings' parent and populates all queues. + return unless $self->validate_dbh; $self->send_to_parent("queue_depth all"); - $self->validate_dbh; my $sto = Mgd::get_store(); $self->read_from_parent(1); my $active = 0; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 924974d5..37e9f3c7 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -46,7 +46,7 @@ sub cache_refresh { my $self = shift; debug("Monitor running; checking DB for updates"); - $self->validate_dbh; + return unless $self->validate_dbh; my $db_data = $self->grab_all_data; @@ -60,7 +60,7 @@ sub usage_refresh { my $self = shift; debug("Monitor running; scanning usage files"); - $self->validate_dbh; + my $have_dbh = $self->validate_dbh; $self->{skip_host} = {}; # hostid -> 1 if already noted dead. $self->{seen_hosts} = {}; # IP -> 1 @@ -75,7 +75,7 @@ sub usage_refresh { } $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev) if $dev->dstate->should_monitor; + $self->check_device($dev, $have_dbh) if $dev->dstate->should_monitor; } $self->{devutil}->{prev} = $cur_iow; @@ -288,7 +288,7 @@ sub ua { } sub check_device { - my ($self, $dev) = @_; + my ($self, $dev, $have_dbh) = @_; my $devid = $dev->id; my $host = $dev->host; @@ -354,7 +354,7 @@ sub check_device { my $last_update = $self->{last_db_update}{$dev->id} || 0; my $next_update = $last_update + UPDATE_DB_EVERY; my $now = time(); - if ($now >= $next_update) { + if ($now >= $next_update && $have_dbh) { Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), mb_used => int($used / 1024), devid => $devid); diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 9d64afa0..eda2aed7 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -38,12 +38,9 @@ sub work { my $queue_todo = $self->queue_todo('replicate'); my $queue_todo2 = $self->queue_todo('rebalance'); - unless (@$queue_todo || @$queue_todo2) { - return; - } + return unless (@$queue_todo || @$queue_todo2); - $self->validate_dbh; - my $dbh = $self->get_dbh or return 0; + return unless $self->validate_dbh; my $sto = Mgd::get_store(); while (my $todo = shift @$queue_todo) { From 46c880c90625e06bf56c10cd38a6b754fb5809d2 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 2 Dec 2011 18:43:01 -0800 Subject: [PATCH 118/405] don't spew errors if the master goes down complain if the server had gone away after a ping test. workers will also crash if the server dies in the middle of its routine, but not continue to spam. We were also uselessly selecting slave info from the master and slave constantly :( Both of those things are probably also jobs for the monitor worker. --- lib/MogileFS/Server.pm | 3 ++- lib/MogileFS/Store.pm | 2 ++ lib/MogileFS/Store/MySQL.pm | 9 +++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 18e8e7a6..fb9e3c11 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -229,11 +229,12 @@ sub server { # database checking/connecting sub validate_dbh { my $sto = Mgd::get_store(); + my $had_dbh = $sto->have_dbh; $sto->recheck_dbh(); my $dbh; eval { $dbh = $sto->dbh }; # Doesn't matter what the failure was; workers should retry later. - error("Error validating master DB: $@") if $@; + error("Error validating master DB: $@") if $@ && $had_dbh; return $dbh; } sub get_dbh { return Mgd::get_store()->dbh } diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index d30d341a..8139cccb 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -288,6 +288,8 @@ sub dbh { return $self->{dbh}; } +sub have_dbh { return 1 if $_[0]->{dbh}; } + sub ping { my $self = shift; return $self->dbh->ping; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 6b271b44..43ea8a90 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -88,11 +88,12 @@ sub check_slave { return 1; } - my $master_status = eval { $self->dbh->selectrow_hashref("SHOW MASTER STATUS") }; - warn "Error thrown: '$@' while trying to get master status." if $@; + # FIXME: Only make these calls when we're ready to do something with them. + #my $master_status = eval { $self->dbh->selectrow_hashref("SHOW MASTER STATUS") }; + #warn "Error thrown: '$@' while trying to get master status." if $@; - my $slave_status = eval { $self->{slave}->dbh->selectrow_hashref("SHOW SLAVE STATUS") }; - warn "Error thrown: '$@' while trying to get slave status." if $@; + #my $slave_status = eval { $self->{slave}->dbh->selectrow_hashref("SHOW SLAVE STATUS") }; + #warn "Error thrown: '$@' while trying to get slave status." if $@; # compare contrast, return 0 if not okay. # Master: File Position From b5f00f35e37aae1617c16a39bf29fffef8cc90d0 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 5 Dec 2011 14:15:32 -0800 Subject: [PATCH 119/405] version the slave settings instead of time-refresh should avoid reparsing the slave settings so often when people typically don't edit them much. the gross hack required for easily adding the version means the number will jump a few times per edit, but that doesn't hurt anything. --- lib/MogileFS/Store.pm | 7 ++++--- lib/MogileFS/Worker/Query.pm | 8 ++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 8139cccb..0f381685 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -49,7 +49,7 @@ sub new_from_dsn_user_pass { pass => $pass, max_handles => $max_handles, # Max number of handles to allow raise_errors => $subclass->want_raise_errors, - slave_list_cachetime => 0, + slave_list_version => 0, slave_list_cache => [], recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was @@ -177,10 +177,11 @@ sub _slaves_list { my $now = time(); # only reload every 15 seconds. - if ($self->{slave_list_cachetime} > $now - 15) { + my $ver = MogileFS::Config->server_setting_cached('slave_version') || 0; + if ($ver <= $self->{slave_list_version}) { return @{$self->{slave_list_cache}}; } - $self->{slave_list_cachetime} = $now; + $self->{slave_list_version} = $ver; $self->{slave_list_cache} = []; my $sk = MogileFS::Config->server_setting_cached('slave_keys') diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 30ee72a2..1b99aa9c 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1394,6 +1394,14 @@ sub cmd_set_server_setting { return $self->err_line("invalid_format", $@) if $@; MogileFS::Config->set_server_setting($key, $cleanval); + + # GROSS HACK: slave settings are managed directly by MogileFS::Client, but + # I need to add a version key, so we check and inject that code here. + # FIXME: Move this when slave keys are managed by query worker commands! + if ($key =~ /^slave_/) { + Mgd::get_store()->incr_server_setting('slave_version', 1); + } + return $self->ok_line; } From 893b63d1a0829ec7c4e0ce66f9195e45564c25d9 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 03:11:51 -0800 Subject: [PATCH 120/405] improve slave handling code add new hook "slave_list_filter" for ZoneLocal/etc to use for narrowing to local_network slaves. Improve slave handling code; use a random slave each time, but avoid reconnecting all slaves if one dies. Avoid retrying dead slaves for slave_dead_retry_timeout seconds (15s default). Do reconnect all slaves if the configuration changes. --- lib/MogileFS/Store.pm | 66 +++++++++++++++++++++++++++++-------- lib/MogileFS/Store/MySQL.pm | 13 +++----- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0f381685..4b06ff51 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -4,7 +4,7 @@ use warnings; use Carp qw(croak); use MogileFS::Util qw(throw max error); use DBI; # no reason a Store has to be DBI-based, but for now they all are. -use List::Util (); +use List::Util qw(shuffle); # this is incremented whenever the schema changes. server will refuse # to start-up with an old schema version @@ -54,6 +54,8 @@ sub new_from_dsn_user_pass { recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was handles_left => 0, # amount of times this handle can still be verified + connected_slaves => {}, + dead_slaves => {}, }, $subclass; $self->init; return $self; @@ -171,18 +173,22 @@ sub is_slave { return $self->{slave}; } -# Returns a list of arrayrefs, each being [$dsn, $username, $password] for connecting to a slave DB. -sub _slaves_list { +sub _slaves_list_changed { my $self = shift; - my $now = time(); - - # only reload every 15 seconds. my $ver = MogileFS::Config->server_setting_cached('slave_version') || 0; if ($ver <= $self->{slave_list_version}) { - return @{$self->{slave_list_cache}}; + return 0; } $self->{slave_list_version} = $ver; - $self->{slave_list_cache} = []; + # Restart connections from scratch if the configuration changed. + $self->{connected_slaves} = {}; + return 1; +} + +# Returns a list of arrayrefs, each being [$dsn, $username, $password] for connecting to a slave DB. +sub _slaves_list { + my $self = shift; + my $now = time(); my $sk = MogileFS::Config->server_setting_cached('slave_keys') or return (); @@ -208,26 +214,61 @@ sub _slaves_list { return @ret; } +sub _pick_slave { + my $self = shift; + my @temp = shuffle keys %{$self->{connected_slaves}}; + return unless @temp; + return $self->{connected_slaves}->{$temp[0]}; +} + sub get_slave { my $self = shift; + my $now = time(); die "Incapable of having slaves." unless $self->can_do_slaves; - return $self->{slave} if $self->check_slave; + $self->{slave} = undef; + unless ($self->_slaves_list_changed) { + if ($self->{slave} = $self->_pick_slave) { + $self->{slave}->{recheck_req_gen} = $self->{recheck_req_gen}; + return $self->{slave} if $self->check_slave; + } + } + if ($self->{slave}) { + my $dsn = $self->{slave}->{dsn}; + $self->{dead_slaves}->{$dsn} = $now; + delete $self->{connected_slaves}->{$dsn}; + error("Error talking to slave: $dsn"); + } my @slaves_list = $self->_slaves_list; # If we have no slaves, then return silently. return unless @slaves_list; + MogileFS::run_global_hook('slave_list_filter', \@slaves_list); + + my $dead_retry = + MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; + foreach my $slave_fulldsn (@slaves_list) { + my $dead_timeout = $self->{dead_slaves}->{$slave_fulldsn->[0]}; + next if (defined $dead_timeout && $dead_timeout + $dead_retry > $now); + next if ($self->{connected_slaves}->{$slave_fulldsn->[0]}); + my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); - $self->{slave_next_check} = 0; + $self->{slave}->{next_check} = 0; $newslave->mark_as_slave; - return $newslave - if $self->check_slave; + if ($self->check_slave) { + $self->{connected_slaves}->{$slave_fulldsn->[0]} = $newslave; + } else { + $self->{dead_slaves}->{$slave_fulldsn->[0]} = $now; + } } + if ($self->{slave} = $self->_pick_slave) { + return $self->{slave}; + } warn "Slave list exhausted, failing back to master."; return; } @@ -239,7 +280,6 @@ sub read_store { if ($self->{slave_ok}) { if (my $slave = $self->get_slave) { - $slave->{recheck_req_gen} = $self->{recheck_req_gen}; return $slave; } } diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 43ea8a90..bb2229a1 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -82,22 +82,19 @@ sub check_slave { return 0 unless $self->{slave}; - my $next_check = \$self->{slave_next_check}; + my $next_check = \$self->{slave}->{next_check}; if ($$next_check > time()) { return 1; } - # FIXME: Only make these calls when we're ready to do something with them. - #my $master_status = eval { $self->dbh->selectrow_hashref("SHOW MASTER STATUS") }; - #warn "Error thrown: '$@' while trying to get master status." if $@; - #my $slave_status = eval { $self->{slave}->dbh->selectrow_hashref("SHOW SLAVE STATUS") }; #warn "Error thrown: '$@' while trying to get slave status." if $@; - # compare contrast, return 0 if not okay. - # Master: File Position - # Slave: + # TODO: Check show slave status *unless* a server setting is present to + # tell us to ignore it (like in a multi-DC setup). + eval { $self->{slave}->dbh }; + return 0 if $@; # call time() again here because SQL blocks. $$next_check = time() + 5; From 2465245c39b97ff859f310ca81dc7a43b585397a Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 03:21:33 -0800 Subject: [PATCH 121/405] Destroy mogdeps Modules are years out of date, and tend to hurt more than they help now. We should figure out carton/etc. --- MANIFEST | 43 - Makefile.PL | 1 - MogileFS-Server.spec | 4 - debian/rules | 4 - lib/MogileFS/Server.pm | 19 - lib/mogdeps/Danga/Socket.pm | 1437 -------------- lib/mogdeps/Net/Netmask.pm | 583 ------ lib/mogdeps/Perlbal.pm | 1377 -------------- lib/mogdeps/Perlbal/AIO.pm | 284 --- lib/mogdeps/Perlbal/BackendHTTP.pm | 755 -------- lib/mogdeps/Perlbal/Cache.pm | 244 --- lib/mogdeps/Perlbal/ChunkedUploadState.pm | 64 - lib/mogdeps/Perlbal/ClientHTTP.pm | 475 ----- lib/mogdeps/Perlbal/ClientHTTPBase.pm | 905 --------- lib/mogdeps/Perlbal/ClientManage.pm | 150 -- lib/mogdeps/Perlbal/ClientProxy.pm | 1280 ------------- lib/mogdeps/Perlbal/CommandContext.pm | 35 - lib/mogdeps/Perlbal/HTTPHeaders.pm | 483 ----- lib/mogdeps/Perlbal/ManageCommand.pm | 104 - lib/mogdeps/Perlbal/Plugin/AccessControl.pm | 201 -- lib/mogdeps/Perlbal/Plugin/AtomInject.pm | 65 - lib/mogdeps/Perlbal/Plugin/AtomStream.pm | 138 -- .../Perlbal/Plugin/AutoRemoveLeadingDir.pm | 51 - lib/mogdeps/Perlbal/Plugin/Cgilike.pm | 343 ---- lib/mogdeps/Perlbal/Plugin/EchoService.pm | 123 -- lib/mogdeps/Perlbal/Plugin/Highpri.pm | 125 -- lib/mogdeps/Perlbal/Plugin/Include.pm | 90 - lib/mogdeps/Perlbal/Plugin/LazyCDN.pm | 103 - .../Perlbal/Plugin/MaxContentLength.pm | 91 - lib/mogdeps/Perlbal/Plugin/NotModified.pm | 84 - lib/mogdeps/Perlbal/Plugin/Palimg.pm | 368 ---- lib/mogdeps/Perlbal/Plugin/Queues.pm | 55 - lib/mogdeps/Perlbal/Plugin/Redirect.pm | 130 -- lib/mogdeps/Perlbal/Plugin/Stats.pm | 167 -- lib/mogdeps/Perlbal/Plugin/Vhosts.pm | 179 -- lib/mogdeps/Perlbal/Plugin/Vpaths.pm | 105 - lib/mogdeps/Perlbal/Pool.pm | 301 --- lib/mogdeps/Perlbal/ReproxyManager.pm | 255 --- lib/mogdeps/Perlbal/Service.pm | 1686 ----------------- lib/mogdeps/Perlbal/Socket.pm | 390 ---- lib/mogdeps/Perlbal/SocketSSL.pm | 135 -- lib/mogdeps/Perlbal/TCPListener.pm | 193 -- lib/mogdeps/Perlbal/Test.pm | 401 ---- lib/mogdeps/Perlbal/Test/WebClient.pm | 200 -- lib/mogdeps/Perlbal/Test/WebServer.pm | 264 --- lib/mogdeps/Perlbal/UploadListener.pm | 107 -- lib/mogdeps/Perlbal/Util.pm | 53 - lib/mogdeps/Sys/Syscall.pm | 335 ---- mogstored | 19 - 49 files changed, 15004 deletions(-) delete mode 100644 lib/mogdeps/Danga/Socket.pm delete mode 100644 lib/mogdeps/Net/Netmask.pm delete mode 100644 lib/mogdeps/Perlbal.pm delete mode 100644 lib/mogdeps/Perlbal/AIO.pm delete mode 100644 lib/mogdeps/Perlbal/BackendHTTP.pm delete mode 100644 lib/mogdeps/Perlbal/Cache.pm delete mode 100644 lib/mogdeps/Perlbal/ChunkedUploadState.pm delete mode 100644 lib/mogdeps/Perlbal/ClientHTTP.pm delete mode 100644 lib/mogdeps/Perlbal/ClientHTTPBase.pm delete mode 100644 lib/mogdeps/Perlbal/ClientManage.pm delete mode 100644 lib/mogdeps/Perlbal/ClientProxy.pm delete mode 100644 lib/mogdeps/Perlbal/CommandContext.pm delete mode 100644 lib/mogdeps/Perlbal/HTTPHeaders.pm delete mode 100644 lib/mogdeps/Perlbal/ManageCommand.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/AccessControl.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/AtomInject.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/AtomStream.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Cgilike.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/EchoService.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Highpri.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Include.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/LazyCDN.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/NotModified.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Palimg.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Queues.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Redirect.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Stats.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Vhosts.pm delete mode 100644 lib/mogdeps/Perlbal/Plugin/Vpaths.pm delete mode 100644 lib/mogdeps/Perlbal/Pool.pm delete mode 100644 lib/mogdeps/Perlbal/ReproxyManager.pm delete mode 100644 lib/mogdeps/Perlbal/Service.pm delete mode 100644 lib/mogdeps/Perlbal/Socket.pm delete mode 100644 lib/mogdeps/Perlbal/SocketSSL.pm delete mode 100644 lib/mogdeps/Perlbal/TCPListener.pm delete mode 100644 lib/mogdeps/Perlbal/Test.pm delete mode 100644 lib/mogdeps/Perlbal/Test/WebClient.pm delete mode 100644 lib/mogdeps/Perlbal/Test/WebServer.pm delete mode 100644 lib/mogdeps/Perlbal/UploadListener.pm delete mode 100644 lib/mogdeps/Perlbal/Util.pm delete mode 100644 lib/mogdeps/Sys/Syscall.pm diff --git a/MANIFEST b/MANIFEST index 84f32ad3..d51b2b60 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,49 +6,6 @@ doc/fsck-notes.txt doc/memcache-support.txt doc/pluggable-replication-policies.txt doc/testing.txt -lib/mogdeps/Danga/Socket.pm -lib/mogdeps/Net/Netmask.pm -lib/mogdeps/Perlbal.pm -lib/mogdeps/Perlbal/AIO.pm -lib/mogdeps/Perlbal/BackendHTTP.pm -lib/mogdeps/Perlbal/Cache.pm -lib/mogdeps/Perlbal/ChunkedUploadState.pm -lib/mogdeps/Perlbal/ClientHTTP.pm -lib/mogdeps/Perlbal/ClientHTTPBase.pm -lib/mogdeps/Perlbal/ClientManage.pm -lib/mogdeps/Perlbal/ClientProxy.pm -lib/mogdeps/Perlbal/CommandContext.pm -lib/mogdeps/Perlbal/HTTPHeaders.pm -lib/mogdeps/Perlbal/ManageCommand.pm -lib/mogdeps/Perlbal/Plugin/AccessControl.pm -lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm -lib/mogdeps/Perlbal/Plugin/Highpri.pm -lib/mogdeps/Perlbal/Plugin/NotModified.pm -lib/mogdeps/Perlbal/Plugin/Queues.pm -lib/mogdeps/Perlbal/Plugin/Stats.pm -lib/mogdeps/Perlbal/Plugin/Vhosts.pm -lib/mogdeps/Perlbal/Plugin/AtomInject.pm -lib/mogdeps/Perlbal/Plugin/AtomStream.pm -lib/mogdeps/Perlbal/Plugin/Cgilike.pm -lib/mogdeps/Perlbal/Plugin/EchoService.pm -lib/mogdeps/Perlbal/Plugin/Include.pm -lib/mogdeps/Perlbal/Plugin/LazyCDN.pm -lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm -lib/mogdeps/Perlbal/Plugin/Palimg.pm -lib/mogdeps/Perlbal/Plugin/Redirect.pm -lib/mogdeps/Perlbal/Plugin/Vpaths.pm -lib/mogdeps/Perlbal/Pool.pm -lib/mogdeps/Perlbal/ReproxyManager.pm -lib/mogdeps/Perlbal/Service.pm -lib/mogdeps/Perlbal/Socket.pm -lib/mogdeps/Perlbal/TCPListener.pm -lib/mogdeps/Perlbal/Test.pm -lib/mogdeps/Perlbal/Test/WebClient.pm -lib/mogdeps/Perlbal/Test/WebServer.pm -lib/mogdeps/Perlbal/UploadListener.pm -lib/mogdeps/Perlbal/Util.pm -lib/mogdeps/Perlbal/SocketSSL.pm -lib/mogdeps/Sys/Syscall.pm lib/MogileFS/Class.pm lib/MogileFS/Config.pm lib/MogileFS/Connection/Client.pm diff --git a/Makefile.PL b/Makefile.PL index b8739d66..8a19f011 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -36,7 +36,6 @@ WriteMakefile( }, META_MERGE => { no_index => { - directory => 'lib/mogdeps', package => ['ProcessHandle', 'TrackerHandle', 'MogstoredHandle', 'MogPath', 'Mgd'], }, diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 40eea05c..142e1b7a 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -38,10 +38,6 @@ make pure_install [ -x /usr/lib/rpm/brp-compress ] && /usr/lib/rpm/brp-compress -# remove mogdeps and related files -rm -rf %{buildroot}/%{perl_vendorlib}/mogdeps -rm -f %{buildroot}/usr/share/man/man3/mogdeps::* - # remove special files find %{buildroot} \( \ -name "perllocal.pod" \ diff --git a/debian/rules b/debian/rules index 24232808..41c724e3 100755 --- a/debian/rules +++ b/debian/rules @@ -44,10 +44,6 @@ install-stamp: find $(TMP) -name .packlist -exec rm '{}' \; find $(TMP) -depth -type d -empty -exec rmdir '{}' \; - # destroy all the mogdeps stuff - rm -rf $(TMP)/usr/share/perl5/mogdeps - rm -rf $(TMP)/usr/share/man/man3/mogdeps::* - dh_install --sourcedir=$(TMP) --fail-missing touch install-stamp diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index fb9e3c11..4841407f 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -15,25 +15,6 @@ MogileFS::Server - MogileFS (distributed filesystem) server =cut -# based on where we found this file (a pure-perl module), -# add the mogdeps/ subdirectory of that base to our @INC search -# path, where all the misc Mogile dependencies are installed. -BEGIN { - my $libpath; - if (! $ENV{MOGILE_NO_BUILTIN_DEPS} && - ($libpath = $INC{"MogileFS/Server.pm"}) && - $libpath =~ s!MogileFS/Server.pm$!!) - { - my $dep_dir = "${libpath}mogdeps"; - push @INC, $dep_dir; - unless (($ENV{PERL5LIB} || "") =~ /$dep_dir/) { - $ENV{PERL5LIB} = join(":", - split(/:/, $ENV{PERL5LIB} || ""), - $dep_dir); - } - } -} - use IO::Socket; use Symbol; use POSIX; diff --git a/lib/mogdeps/Danga/Socket.pm b/lib/mogdeps/Danga/Socket.pm deleted file mode 100644 index 9bebf542..00000000 --- a/lib/mogdeps/Danga/Socket.pm +++ /dev/null @@ -1,1437 +0,0 @@ -########################################################################### - -=head1 NAME - -Danga::Socket - Event loop and event-driven async socket base class - -=head1 SYNOPSIS - - package My::Socket - use Danga::Socket; - use base ('Danga::Socket'); - use fields ('my_attribute'); - - sub new { - my My::Socket $self = shift; - $self = fields::new($self) unless ref $self; - $self->SUPER::new( @_ ); - - $self->{my_attribute} = 1234; - return $self; - } - - sub event_err { ... } - sub event_hup { ... } - sub event_write { ... } - sub event_read { ... } - sub close { ... } - - $my_sock->tcp_cork($bool); - - # write returns 1 if all writes have gone through, or 0 if there - # are writes in queue - $my_sock->write($scalar); - $my_sock->write($scalarref); - $my_sock->write(sub { ... }); # run when previous data written - $my_sock->write(undef); # kick-starts - - # read max $bytecount bytes, or undef on connection closed - $scalar_ref = $my_sock->read($bytecount); - - # watch for writability. not needed with ->write(). write() - # will automatically turn on watch_write when you wrote too much - # and turn it off when done - $my_sock->watch_write($bool); - - # watch for readability - $my_sock->watch_read($bool); - - # if you read too much and want to push some back on - # readable queue. (not incredibly well-tested) - $my_sock->push_back_read($buf); # scalar or scalar ref - - Danga::Socket->AddOtherFds(..); - Danga::Socket->SetLoopTimeout($millisecs); - Danga::Socket->DescriptorMap(); - Danga::Socket->WatchedSockets(); # count of DescriptorMap keys - Danga::Socket->SetPostLoopCallback($code); - Danga::Socket->EventLoop(); - -=head1 DESCRIPTION - -This is an abstract base class for objects backed by a socket which -provides the basic framework for event-driven asynchronous IO, -designed to be fast. Danga::Socket is both a base class for objects, -and an event loop. - -Callers subclass Danga::Socket. Danga::Socket's constructor registers -itself with the Danga::Socket event loop, and invokes callbacks on the -object for readability, writability, errors, and other conditions. - -Because Danga::Socket uses the "fields" module, your subclasses must -too. - -=head1 MORE INFO - -For now, see servers using Danga::Socket for guidance. For example: -perlbal, mogilefsd, or ddlockd. - -=head1 API - -Note where "C" is used below, normally you would call these methods as: - - Danga::Socket->method(...); - -However using a subclass works too. - -The CLASS methods are all methods for the event loop part of Danga::Socket, -whereas the object methods are all used on your subclasses. - -=cut - -########################################################################### - -package Danga::Socket; -use strict; -use bytes; -use POSIX (); -use Time::HiRes (); - -my $opt_bsd_resource = eval "use BSD::Resource; 1;"; - -use vars qw{$VERSION}; -$VERSION = "1.61"; - -use warnings; -no warnings qw(deprecated); - -use Sys::Syscall qw(:epoll); - -use fields ('sock', # underlying socket - 'fd', # numeric file descriptor - 'write_buf', # arrayref of scalars, scalarrefs, or coderefs to write - 'write_buf_offset', # offset into first array of write_buf to start writing at - 'write_buf_size', # total length of data in all write_buf items - 'write_set_watch', # bool: true if we internally set watch_write rather than by a subclass - 'read_push_back', # arrayref of "pushed-back" read data the application didn't want - 'closed', # bool: socket is closed - 'corked', # bool: socket is corked - 'event_watch', # bitmask of events the client is interested in (POLLIN,OUT,etc.) - 'peer_v6', # bool: cached; if peer is an IPv6 address - 'peer_ip', # cached stringified IP address of $sock - 'peer_port', # cached port number of $sock - 'local_ip', # cached stringified IP address of local end of $sock - 'local_port', # cached port number of local end of $sock - 'writer_func', # subref which does writing. must return bytes written (or undef) and set $! on errors - ); - -use Errno qw(EINPROGRESS EWOULDBLOCK EISCONN ENOTSOCK - EPIPE EAGAIN EBADF ECONNRESET ENOPROTOOPT); -use Socket qw(IPPROTO_TCP); -use Carp qw(croak confess); - -use constant TCP_CORK => ($^O eq "linux" ? 3 : 0); # FIXME: not hard-coded (Linux-specific too) -use constant DebugLevel => 0; - -use constant POLLIN => 1; -use constant POLLOUT => 4; -use constant POLLERR => 8; -use constant POLLHUP => 16; -use constant POLLNVAL => 32; - -our $HAVE_KQUEUE = eval { require IO::KQueue; 1 }; - -our ( - $HaveEpoll, # Flag -- is epoll available? initially undefined. - $HaveKQueue, - %DescriptorMap, # fd (num) -> Danga::Socket object - %PushBackSet, # fd (num) -> Danga::Socket (fds with pushed back read data) - $Epoll, # Global epoll fd (for epoll mode only) - $KQueue, # Global kqueue fd (for kqueue mode only) - @ToClose, # sockets to close when event loop is done - %OtherFds, # A hash of "other" (non-Danga::Socket) file - # descriptors for the event loop to track. - - $PostLoopCallback, # subref to call at the end of each loop, if defined (global) - %PLCMap, # fd (num) -> PostLoopCallback (per-object) - - $LoopTimeout, # timeout of event loop in milliseconds - $DoProfile, # if on, enable profiling - %Profiling, # what => [ utime, stime, calls ] - $DoneInit, # if we've done the one-time module init yet - @Timers, # timers - ); - -Reset(); - -##################################################################### -### C L A S S M E T H O D S -##################################################################### - -=head2 C<< CLASS->Reset() >> - -Reset all state - -=cut -sub Reset { - %DescriptorMap = (); - %PushBackSet = (); - @ToClose = (); - %OtherFds = (); - $LoopTimeout = -1; # no timeout by default - $DoProfile = 0; - %Profiling = (); - @Timers = (); - - $PostLoopCallback = undef; - %PLCMap = (); - $DoneInit = 0; - - POSIX::close($Epoll) if defined $Epoll && $Epoll >= 0; - POSIX::close($KQueue) if defined $KQueue && $KQueue >= 0; - - *EventLoop = *FirstTimeEventLoop; -} - -=head2 C<< CLASS->HaveEpoll() >> - -Returns a true value if this class will use IO::Epoll for async IO. - -=cut -sub HaveEpoll { - _InitPoller(); - return $HaveEpoll; -} - -=head2 C<< CLASS->WatchedSockets() >> - -Returns the number of file descriptors which are registered with the global -poll object. - -=cut -sub WatchedSockets { - return scalar keys %DescriptorMap; -} -*watched_sockets = *WatchedSockets; - -=head2 C<< CLASS->EnableProfiling() >> - -Turns profiling on, clearing current profiling data. - -=cut -sub EnableProfiling { - if ($opt_bsd_resource) { - %Profiling = (); - $DoProfile = 1; - return 1; - } - return 0; -} - -=head2 C<< CLASS->DisableProfiling() >> - -Turns off profiling, but retains data up to this point - -=cut -sub DisableProfiling { - $DoProfile = 0; -} - -=head2 C<< CLASS->ProfilingData() >> - -Returns reference to a hash of data in format: - - ITEM => [ utime, stime, #calls ] - -=cut -sub ProfilingData { - return \%Profiling; -} - -=head2 C<< CLASS->ToClose() >> - -Return the list of sockets that are awaiting close() at the end of the -current event loop. - -=cut -sub ToClose { return @ToClose; } - -=head2 C<< CLASS->OtherFds( [%fdmap] ) >> - -Get/set the hash of file descriptors that need processing in parallel with -the registered Danga::Socket objects. - -=cut -sub OtherFds { - my $class = shift; - if ( @_ ) { %OtherFds = @_ } - return wantarray ? %OtherFds : \%OtherFds; -} - -=head2 C<< CLASS->AddOtherFds( [%fdmap] ) >> - -Add fds to the OtherFds hash for processing. - -=cut -sub AddOtherFds { - my $class = shift; - %OtherFds = ( %OtherFds, @_ ); # FIXME investigate what happens on dupe fds - return wantarray ? %OtherFds : \%OtherFds; -} - -=head2 C<< CLASS->SetLoopTimeout( $timeout ) >> - -Set the loop timeout for the event loop to some value in milliseconds. - -A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return -immediately. - -=cut -sub SetLoopTimeout { - return $LoopTimeout = $_[1] + 0; -} - -=head2 C<< CLASS->DebugMsg( $format, @args ) >> - -Print the debugging message specified by the C-style I and -I - -=cut -sub DebugMsg { - my ( $class, $fmt, @args ) = @_; - chomp $fmt; - printf STDERR ">>> $fmt\n", @args; -} - -=head2 C<< CLASS->AddTimer( $seconds, $coderef ) >> - -Add a timer to occur $seconds from now. $seconds may be fractional, but timers -are not guaranteed to fire at the exact time you ask for. - -Returns a timer object which you can call C<< $timer->cancel >> on if you need to. - -=cut -sub AddTimer { - my $class = shift; - my ($secs, $coderef) = @_; - - my $fire_time = Time::HiRes::time() + $secs; - - my $timer = bless [$fire_time, $coderef], "Danga::Socket::Timer"; - - if (!@Timers || $fire_time >= $Timers[-1][0]) { - push @Timers, $timer; - return $timer; - } - - # Now, where do we insert? (NOTE: this appears slow, algorithm-wise, - # but it was compared against calendar queues, heaps, naive push/sort, - # and a bunch of other versions, and found to be fastest with a large - # variety of datasets.) - for (my $i = 0; $i < @Timers; $i++) { - if ($Timers[$i][0] > $fire_time) { - splice(@Timers, $i, 0, $timer); - return $timer; - } - } - - die "Shouldn't get here."; -} - -=head2 C<< CLASS->DescriptorMap() >> - -Get the hash of Danga::Socket objects keyed by the file descriptor (fileno) they -are wrapping. - -Returns a hash in list context or a hashref in scalar context. - -=cut -sub DescriptorMap { - return wantarray ? %DescriptorMap : \%DescriptorMap; -} -*descriptor_map = *DescriptorMap; -*get_sock_ref = *DescriptorMap; - -sub _InitPoller -{ - return if $DoneInit; - $DoneInit = 1; - - if ($HAVE_KQUEUE) { - $KQueue = IO::KQueue->new(); - $HaveKQueue = $KQueue >= 0; - if ($HaveKQueue) { - *EventLoop = *KQueueEventLoop; - } - } - elsif (Sys::Syscall::epoll_defined()) { - $Epoll = eval { epoll_create(1024); }; - $HaveEpoll = defined $Epoll && $Epoll >= 0; - if ($HaveEpoll) { - *EventLoop = *EpollEventLoop; - } - } - - if (!$HaveEpoll && !$HaveKQueue) { - require IO::Poll; - *EventLoop = *PollEventLoop; - } -} - -=head2 C<< CLASS->EventLoop() >> - -Start processing IO events. In most daemon programs this never exits. See -C below for how to exit the loop. - -=cut -sub FirstTimeEventLoop { - my $class = shift; - - _InitPoller(); - - if ($HaveEpoll) { - EpollEventLoop($class); - } elsif ($HaveKQueue) { - KQueueEventLoop($class); - } else { - PollEventLoop($class); - } -} - -## profiling-related data/functions -our ($Prof_utime0, $Prof_stime0); -sub _pre_profile { - ($Prof_utime0, $Prof_stime0) = getrusage(); -} - -sub _post_profile { - # get post information - my ($autime, $astime) = getrusage(); - - # calculate differences - my $utime = $autime - $Prof_utime0; - my $stime = $astime - $Prof_stime0; - - foreach my $k (@_) { - $Profiling{$k} ||= [ 0.0, 0.0, 0 ]; - $Profiling{$k}->[0] += $utime; - $Profiling{$k}->[1] += $stime; - $Profiling{$k}->[2]++; - } -} - -# runs timers and returns milliseconds for next one, or next event loop -sub RunTimers { - return $LoopTimeout unless @Timers; - - my $now = Time::HiRes::time(); - - # Run expired timers - while (@Timers && $Timers[0][0] <= $now) { - my $to_run = shift(@Timers); - $to_run->[1]->($now) if $to_run->[1]; - } - - return $LoopTimeout unless @Timers; - - # convert time to an even number of milliseconds, adding 1 - # extra, otherwise floating point fun can occur and we'll - # call RunTimers like 20-30 times, each returning a timeout - # of 0.0000212 seconds - my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; - - # -1 is an infinite timeout, so prefer a real timeout - return $timeout if $LoopTimeout == -1; - - # otherwise pick the lower of our regular timeout and time until - # the next timer - return $LoopTimeout if $LoopTimeout < $timeout; - return $timeout; -} - -### The epoll-based event loop. Gets installed as EventLoop if IO::Epoll loads -### okay. -sub EpollEventLoop { - my $class = shift; - - foreach my $fd ( keys %OtherFds ) { - if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, EPOLLIN) == -1) { - warn "epoll_ctl(): failure adding fd=$fd; $! (", $!+0, ")\n"; - } - } - - while (1) { - my @events; - my $i; - my $timeout = RunTimers(); - - # get up to 1000 events - my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events); - EVENT: - for ($i=0; $i<$evcount; $i++) { - my $ev = $events[$i]; - - # it's possible epoll_wait returned many events, including some at the end - # that ones in the front triggered unregister-interest actions. if we - # can't find the %sock entry, it's because we're no longer interested - # in that event. - my Danga::Socket $pob = $DescriptorMap{$ev->[0]}; - my $code; - my $state = $ev->[1]; - - # if we didn't find a Perlbal::Socket subclass for that fd, try other - # pseudo-registered (above) fds. - if (! $pob) { - if (my $code = $OtherFds{$ev->[0]}) { - $code->($state); - } else { - my $fd = $ev->[0]; - warn "epoll() returned fd $fd w/ state $state for which we have no mapping. removing.\n"; - POSIX::close($fd); - epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0); - } - next; - } - - DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), state=%d \@ %s\n", - $ev->[0], ref($pob), $ev->[1], time); - - if ($DoProfile) { - my $class = ref $pob; - - # call profiling action on things that need to be done - if ($state & EPOLLIN && ! $pob->{closed}) { - _pre_profile(); - $pob->event_read; - _post_profile("$class-read"); - } - - if ($state & EPOLLOUT && ! $pob->{closed}) { - _pre_profile(); - $pob->event_write; - _post_profile("$class-write"); - } - - if ($state & (EPOLLERR|EPOLLHUP)) { - if ($state & EPOLLERR && ! $pob->{closed}) { - _pre_profile(); - $pob->event_err; - _post_profile("$class-err"); - } - if ($state & EPOLLHUP && ! $pob->{closed}) { - _pre_profile(); - $pob->event_hup; - _post_profile("$class-hup"); - } - } - - next; - } - - # standard non-profiling codepat - $pob->event_read if $state & EPOLLIN && ! $pob->{closed}; - $pob->event_write if $state & EPOLLOUT && ! $pob->{closed}; - if ($state & (EPOLLERR|EPOLLHUP)) { - $pob->event_err if $state & EPOLLERR && ! $pob->{closed}; - $pob->event_hup if $state & EPOLLHUP && ! $pob->{closed}; - } - } - return unless PostEventLoop(); - } - exit 0; -} - -### The fallback IO::Poll-based event loop. Gets installed as EventLoop if -### IO::Epoll fails to load. -sub PollEventLoop { - my $class = shift; - - my Danga::Socket $pob; - - while (1) { - my $timeout = RunTimers(); - - # the following sets up @poll as a series of ($poll,$event_mask) - # items, then uses IO::Poll::_poll, implemented in XS, which - # modifies the array in place with the even elements being - # replaced with the event masks that occured. - my @poll; - foreach my $fd ( keys %OtherFds ) { - push @poll, $fd, POLLIN; - } - while ( my ($fd, $sock) = each %DescriptorMap ) { - push @poll, $fd, $sock->{event_watch}; - } - - # if nothing to poll, either end immediately (if no timeout) - # or just keep calling the callback - unless (@poll) { - select undef, undef, undef, ($timeout / 1000); - return unless PostEventLoop(); - next; - } - - my $count = IO::Poll::_poll($timeout, @poll); - unless ($count) { - return unless PostEventLoop(); - next; - } - - # Fetch handles with read events - while (@poll) { - my ($fd, $state) = splice(@poll, 0, 2); - next unless $state; - - $pob = $DescriptorMap{$fd}; - - if (!$pob) { - if (my $code = $OtherFds{$fd}) { - $code->($state); - } - next; - } - - $pob->event_read if $state & POLLIN && ! $pob->{closed}; - $pob->event_write if $state & POLLOUT && ! $pob->{closed}; - $pob->event_err if $state & POLLERR && ! $pob->{closed}; - $pob->event_hup if $state & POLLHUP && ! $pob->{closed}; - } - - return unless PostEventLoop(); - } - - exit 0; -} - -### The kqueue-based event loop. Gets installed as EventLoop if IO::KQueue works -### okay. -sub KQueueEventLoop { - my $class = shift; - - foreach my $fd (keys %OtherFds) { - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_READ(), IO::KQueue::EV_ADD()); - } - - while (1) { - my $timeout = RunTimers(); - my @ret = $KQueue->kevent($timeout); - - foreach my $kev (@ret) { - my ($fd, $filter, $flags, $fflags) = @$kev; - my Danga::Socket $pob = $DescriptorMap{$fd}; - if (!$pob) { - if (my $code = $OtherFds{$fd}) { - $code->($filter); - } else { - warn "kevent() returned fd $fd for which we have no mapping. removing.\n"; - POSIX::close($fd); # close deletes the kevent entry - } - next; - } - - DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), flags=%d \@ %s\n", - $fd, ref($pob), $flags, time); - - $pob->event_read if $filter == IO::KQueue::EVFILT_READ() && !$pob->{closed}; - $pob->event_write if $filter == IO::KQueue::EVFILT_WRITE() && !$pob->{closed}; - if ($flags == IO::KQueue::EV_EOF() && !$pob->{closed}) { - if ($fflags) { - $pob->event_err; - } else { - $pob->event_hup; - } - } - } - return unless PostEventLoop(); - } - - exit(0); -} - -=head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >> - -Sets post loop callback function. Pass a subref and it will be -called every time the event loop finishes. - -Return 1 (or any true value) from the sub to make the loop continue, 0 or false -and it will exit. - -The callback function will be passed two parameters: \%DescriptorMap, \%OtherFds. - -=cut -sub SetPostLoopCallback { - my ($class, $ref) = @_; - - if (ref $class) { - # per-object callback - my Danga::Socket $self = $class; - if (defined $ref && ref $ref eq 'CODE') { - $PLCMap{$self->{fd}} = $ref; - } else { - delete $PLCMap{$self->{fd}}; - } - } else { - # global callback - $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef; - } -} - -# Internal function: run the post-event callback, send read events -# for pushed-back data, and close pending connections. returns 1 -# if event loop should continue, or 0 to shut it all down. -sub PostEventLoop { - # fire read events for objects with pushed-back read data - my $loop = 1; - while ($loop) { - $loop = 0; - foreach my $fd (keys %PushBackSet) { - my Danga::Socket $pob = $PushBackSet{$fd}; - - # a previous event_read invocation could've closed a - # connection that we already evaluated in "keys - # %PushBackSet", so skip ones that seem to have - # disappeared. this is expected. - next unless $pob; - - die "ASSERT: the $pob socket has no read_push_back" unless @{$pob->{read_push_back}}; - next unless (! $pob->{closed} && - $pob->{event_watch} & POLLIN); - $loop = 1; - $pob->event_read; - } - } - - # now we can close sockets that wanted to close during our event processing. - # (we didn't want to close them during the loop, as we didn't want fd numbers - # being reused and confused during the event loop) - while (my $sock = shift @ToClose) { - my $fd = fileno($sock); - - # close the socket. (not a Danga::Socket close) - $sock->close; - - # and now we can finally remove the fd from the map. see - # comment above in _cleanup. - delete $DescriptorMap{$fd}; - } - - - # by default we keep running, unless a postloop callback (either per-object - # or global) cancels it - my $keep_running = 1; - - # per-object post-loop-callbacks - for my $plc (values %PLCMap) { - $keep_running &&= $plc->(\%DescriptorMap, \%OtherFds); - } - - # now we're at the very end, call callback if defined - if (defined $PostLoopCallback) { - $keep_running &&= $PostLoopCallback->(\%DescriptorMap, \%OtherFds); - } - - return $keep_running; -} - -##################################################################### -### Danga::Socket-the-object code -##################################################################### - -=head2 OBJECT METHODS - -=head2 C<< CLASS->new( $socket ) >> - -Create a new Danga::Socket subclass object for the given I which will -react to events on it during the C. - -This is normally (always?) called from your subclass via: - - $class->SUPER::new($socket); - -=cut -sub new { - my Danga::Socket $self = shift; - $self = fields::new($self) unless ref $self; - - my $sock = shift; - - $self->{sock} = $sock; - my $fd = fileno($sock); - - Carp::cluck("undef sock and/or fd in Danga::Socket->new. sock=" . ($sock || "") . ", fd=" . ($fd || "")) - unless $sock && $fd; - - $self->{fd} = $fd; - $self->{write_buf} = []; - $self->{write_buf_offset} = 0; - $self->{write_buf_size} = 0; - $self->{closed} = 0; - $self->{corked} = 0; - $self->{read_push_back} = []; - - $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL; - - _InitPoller(); - - if ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $self->{event_watch}) - and die "couldn't add epoll watch for $fd\n"; - } - elsif ($HaveKQueue) { - # Add them to the queue but disabled for now - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_READ(), - IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE()); - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_WRITE(), - IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE()); - } - - Carp::cluck("Danga::Socket::new blowing away existing descriptor map for fd=$fd ($DescriptorMap{$fd})") - if $DescriptorMap{$fd}; - - $DescriptorMap{$fd} = $self; - return $self; -} - - -##################################################################### -### I N S T A N C E M E T H O D S -##################################################################### - -=head2 C<< $obj->tcp_cork( $boolean ) >> - -Turn TCP_CORK on or off depending on the value of I. - -=cut -sub tcp_cork { - my Danga::Socket $self = $_[0]; - my $val = $_[1]; - - # make sure we have a socket - return unless $self->{sock}; - return if $val == $self->{corked}; - - my $rv; - if (TCP_CORK) { - $rv = setsockopt($self->{sock}, IPPROTO_TCP, TCP_CORK, - pack("l", $val ? 1 : 0)); - } else { - # FIXME: implement freebsd *PUSH sockopts - $rv = 1; - } - - # if we failed, close (if we're not already) and warn about the error - if ($rv) { - $self->{corked} = $val; - } else { - if ($! == EBADF || $! == ENOTSOCK) { - # internal state is probably corrupted; warn and then close if - # we're not closed already - warn "setsockopt: $!"; - $self->close('tcp_cork_failed'); - } elsif ($! == ENOPROTOOPT || $!{ENOTSOCK} || $!{EOPNOTSUPP}) { - # TCP implementation doesn't support corking, so just ignore it - # or we're trying to tcp-cork a non-socket (like a socketpair pipe - # which is acting like a socket, which Perlbal does for child - # processes acting like inetd-like web servers) - } else { - # some other error; we should never hit here, but if we do, die - die "setsockopt: $!"; - } - } -} - -=head2 C<< $obj->steal_socket() >> - -Basically returns our socket and makes it so that we don't try to close it, -but we do remove it from epoll handlers. THIS CLOSES $self. It is the same -thing as calling close, except it gives you the socket to use. - -=cut -sub steal_socket { - my Danga::Socket $self = $_[0]; - return if $self->{closed}; - - # cleanup does most of the work of closing this socket - $self->_cleanup(); - - # now undef our internal sock and fd structures so we don't use them - my $sock = $self->{sock}; - $self->{sock} = undef; - return $sock; -} - -=head2 C<< $obj->close( [$reason] ) >> - -Close the socket. The I argument will be used in debugging messages. - -=cut -sub close { - my Danga::Socket $self = $_[0]; - return if $self->{closed}; - - # print out debugging info for this close - if (DebugLevel) { - my ($pkg, $filename, $line) = caller; - my $reason = $_[1] || ""; - warn "Closing \#$self->{fd} due to $pkg/$filename/$line ($reason)\n"; - } - - # this does most of the work of closing us - $self->_cleanup(); - - # defer closing the actual socket until the event loop is done - # processing this round of events. (otherwise we might reuse fds) - if ($self->{sock}) { - push @ToClose, $self->{sock}; - $self->{sock} = undef; - } - - return 0; -} - -### METHOD: _cleanup() -### Called by our closers so we can clean internal data structures. -sub _cleanup { - my Danga::Socket $self = $_[0]; - - # we're effectively closed; we have no fd and sock when we leave here - $self->{closed} = 1; - - # we need to flush our write buffer, as there may - # be self-referential closures (sub { $client->close }) - # preventing the object from being destroyed - $self->{write_buf} = []; - - # uncork so any final data gets sent. only matters if the person closing - # us forgot to do it, but we do it to be safe. - $self->tcp_cork(0); - - # if we're using epoll, we have to remove this from our epoll fd so we stop getting - # notifications about it - if ($HaveEpoll && $self->{fd}) { - if (epoll_ctl($Epoll, EPOLL_CTL_DEL, $self->{fd}, $self->{event_watch}) != 0) { - # dump_error prints a backtrace so we can try to figure out why this happened - $self->dump_error("epoll_ctl(): failure deleting fd=$self->{fd} during _cleanup(); $! (" . ($!+0) . ")"); - } - } - - # now delete from mappings. this fd no longer belongs to us, so we don't want - # to get alerts for it if it becomes writable/readable/etc. - delete $PushBackSet{$self->{fd}}; - delete $PLCMap{$self->{fd}}; - - # we explicitly don't delete from DescriptorMap here until we - # actually close the socket, as we might be in the middle of - # processing an epoll_wait/etc that returned hundreds of fds, one - # of which is not yet processed and is what we're closing. if we - # keep it in DescriptorMap, then the event harnesses can just - # looked at $pob->{closed} and ignore it. but if it's an - # un-accounted for fd, then it (understandably) freak out a bit - # and emit warnings, thinking their state got off. - - # and finally get rid of our fd so we can't use it anywhere else - $self->{fd} = undef; -} - -=head2 C<< $obj->sock() >> - -Returns the underlying IO::Handle for the object. - -=cut -sub sock { - my Danga::Socket $self = shift; - return $self->{sock}; -} - -=head2 C<< $obj->set_writer_func( CODEREF ) >> - -Sets a function to use instead of C when writing data to the socket. - -=cut -sub set_writer_func { - my Danga::Socket $self = shift; - my $wtr = shift; - Carp::croak("Not a subref") unless !defined $wtr || UNIVERSAL::isa($wtr, "CODE"); - $self->{writer_func} = $wtr; -} - -=head2 C<< $obj->write( $data ) >> - -Write the specified data to the underlying handle. I may be scalar, -scalar ref, code ref (to run when there), or undef just to kick-start. -Returns 1 if writes all went through, or 0 if there are writes in queue. If -it returns 1, caller should stop waiting for 'writable' events) - -=cut -sub write { - my Danga::Socket $self; - my $data; - ($self, $data) = @_; - - # nobody should be writing to closed sockets, but caller code can - # do two writes within an event, have the first fail and - # disconnect the other side (whose destructor then closes the - # calling object, but it's still in a method), and then the - # now-dead object does its second write. that is this case. we - # just lie and say it worked. it'll be dead soon and won't be - # hurt by this lie. - return 1 if $self->{closed}; - - my $bref; - - # just queue data if there's already a wait - my $need_queue; - - if (defined $data) { - $bref = ref $data ? $data : \$data; - if ($self->{write_buf_size}) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += ref $bref eq "SCALAR" ? length($$bref) : 1; - return 0; - } - - # this flag says we're bypassing the queue system, knowing we're the - # only outstanding write, and hoping we don't ever need to use it. - # if so later, though, we'll need to queue - $need_queue = 1; - } - - WRITE: - while (1) { - return 1 unless $bref ||= $self->{write_buf}[0]; - - my $len; - eval { - $len = length($$bref); # this will die if $bref is a code ref, caught below - }; - if ($@) { - if (UNIVERSAL::isa($bref, "CODE")) { - unless ($need_queue) { - $self->{write_buf_size}--; # code refs are worth 1 - shift @{$self->{write_buf}}; - } - $bref->(); - - # code refs are just run and never get reenqueued - # (they're one-shot), so turn off the flag indicating the - # outstanding data needs queueing. - $need_queue = 0; - - undef $bref; - next WRITE; - } - die "Write error: $@ <$bref>"; - } - - my $to_write = $len - $self->{write_buf_offset}; - my $written; - if (my $wtr = $self->{writer_func}) { - $written = $wtr->($bref, $to_write, $self->{write_buf_offset}); - } else { - $written = syswrite($self->{sock}, $$bref, $to_write, $self->{write_buf_offset}); - } - - if (! defined $written) { - if ($! == EPIPE) { - return $self->close("EPIPE"); - } elsif ($! == EAGAIN) { - # since connection has stuff to write, it should now be - # interested in pending writes: - if ($need_queue) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += $len; - } - $self->{write_set_watch} = 1 unless $self->{event_watch} & POLLOUT; - $self->watch_write(1); - return 0; - } elsif ($! == ECONNRESET) { - return $self->close("ECONNRESET"); - } - - DebugLevel >= 1 && $self->debugmsg("Closing connection ($self) due to write error: $!\n"); - - return $self->close("write_error"); - } elsif ($written != $to_write) { - DebugLevel >= 2 && $self->debugmsg("Wrote PARTIAL %d bytes to %d", - $written, $self->{fd}); - if ($need_queue) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += $len; - } - # since connection has stuff to write, it should now be - # interested in pending writes: - $self->{write_buf_offset} += $written; - $self->{write_buf_size} -= $written; - $self->on_incomplete_write; - return 0; - } elsif ($written == $to_write) { - DebugLevel >= 2 && $self->debugmsg("Wrote ALL %d bytes to %d (nq=%d)", - $written, $self->{fd}, $need_queue); - $self->{write_buf_offset} = 0; - - if ($self->{write_set_watch}) { - $self->watch_write(0); - $self->{write_set_watch} = 0; - } - - # this was our only write, so we can return immediately - # since we avoided incrementing the buffer size or - # putting it in the buffer. we also know there - # can't be anything else to write. - return 1 if $need_queue; - - $self->{write_buf_size} -= $written; - shift @{$self->{write_buf}}; - undef $bref; - next WRITE; - } - } -} - -sub on_incomplete_write { - my Danga::Socket $self = shift; - $self->{write_set_watch} = 1 unless $self->{event_watch} & POLLOUT; - $self->watch_write(1); -} - -=head2 C<< $obj->push_back_read( $buf ) >> - -Push back I (a scalar or scalarref) into the read stream. Useful if you read -more than you need to and want to return this data on the next "read". - -=cut -sub push_back_read { - my Danga::Socket $self = shift; - my $buf = shift; - push @{$self->{read_push_back}}, ref $buf ? $buf : \$buf; - $PushBackSet{$self->{fd}} = $self; -} - -=head2 C<< $obj->read( $bytecount ) >> - -Read at most I bytes from the underlying handle; returns scalar -ref on read, or undef on connection closed. - -=cut -sub read { - my Danga::Socket $self = shift; - return if $self->{closed}; - my $bytes = shift; - my $buf; - my $sock = $self->{sock}; - - if (@{$self->{read_push_back}}) { - $buf = shift @{$self->{read_push_back}}; - my $len = length($$buf); - - if ($len <= $bytes) { - delete $PushBackSet{$self->{fd}} unless @{$self->{read_push_back}}; - return $buf; - } else { - # if the pushed back read is too big, we have to split it - my $overflow = substr($$buf, $bytes); - $buf = substr($$buf, 0, $bytes); - unshift @{$self->{read_push_back}}, \$overflow; - return \$buf; - } - } - - # if this is too high, perl quits(!!). reports on mailing lists - # don't seem to point to a universal answer. 5MB worked for some, - # crashed for others. 1MB works for more people. let's go with 1MB - # for now. :/ - my $req_bytes = $bytes > 1048576 ? 1048576 : $bytes; - - my $res = sysread($sock, $buf, $req_bytes, 0); - DebugLevel >= 2 && $self->debugmsg("sysread = %d; \$! = %d", $res, $!); - - if (! $res && $! != EWOULDBLOCK) { - # catches 0=conn closed or undef=error - DebugLevel >= 2 && $self->debugmsg("Fd \#%d read hit the end of the road.", $self->{fd}); - return undef; - } - - return \$buf; -} - -=head2 (VIRTUAL) C<< $obj->event_read() >> - -Readable event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_read { die "Base class event_read called for $_[0]\n"; } - -=head2 (VIRTUAL) C<< $obj->event_err() >> - -Error event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_err { die "Base class event_err called for $_[0]\n"; } - -=head2 (VIRTUAL) C<< $obj->event_hup() >> - -'Hangup' event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_hup { die "Base class event_hup called for $_[0]\n"; } - -=head2 C<< $obj->event_write() >> - -Writable event handler. Concrete deriviatives of Danga::Socket may wish to -provide an implementation of this. The default implementation calls -C with an C. - -=cut -sub event_write { - my $self = shift; - $self->write(undef); -} - -=head2 C<< $obj->watch_read( $boolean ) >> - -Turn 'readable' event notification on or off. - -=cut -sub watch_read { - my Danga::Socket $self = shift; - return if $self->{closed} || !$self->{sock}; - - my $val = shift; - my $event = $self->{event_watch}; - - $event &= ~POLLIN if ! $val; - $event |= POLLIN if $val; - - # If it changed, set it - if ($event != $self->{event_watch}) { - if ($HaveKQueue) { - $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_READ(), - $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE()); - } - elsif ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event) - and $self->dump_error("couldn't modify epoll settings for $self->{fd} " . - "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")"); - } - $self->{event_watch} = $event; - } -} - -=head2 C<< $obj->watch_write( $boolean ) >> - -Turn 'writable' event notification on or off. - -=cut -sub watch_write { - my Danga::Socket $self = shift; - return if $self->{closed} || !$self->{sock}; - - my $val = shift; - my $event = $self->{event_watch}; - - $event &= ~POLLOUT if ! $val; - $event |= POLLOUT if $val; - - if ($val && caller ne __PACKAGE__) { - # A subclass registered interest, it's now responsible for this. - $self->{write_set_watch} = 0; - } - - # If it changed, set it - if ($event != $self->{event_watch}) { - if ($HaveKQueue) { - $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_WRITE(), - $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE()); - } - elsif ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event) - and $self->dump_error("couldn't modify epoll settings for $self->{fd} " . - "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")"); - } - $self->{event_watch} = $event; - } -} - -=head2 C<< $obj->dump_error( $message ) >> - -Prints to STDERR a backtrace with information about this socket and what lead -up to the dump_error call. - -=cut -sub dump_error { - my $i = 0; - my @list; - while (my ($file, $line, $sub) = (caller($i++))[1..3]) { - push @list, "\t$file:$line called $sub\n"; - } - - warn "ERROR: $_[1]\n" . - "\t$_[0] = " . $_[0]->as_string . "\n" . - join('', @list); -} - -=head2 C<< $obj->debugmsg( $format, @args ) >> - -Print the debugging message specified by the C-style I and -I. - -=cut -sub debugmsg { - my ( $self, $fmt, @args ) = @_; - confess "Not an object" unless ref $self; - - chomp $fmt; - printf STDERR ">>> $fmt\n", @args; -} - - -=head2 C<< $obj->peer_ip_string() >> - -Returns the string describing the peer's IP - -=cut -sub peer_ip_string { - my Danga::Socket $self = shift; - return _undef("peer_ip_string undef: no sock") unless $self->{sock}; - return $self->{peer_ip} if defined $self->{peer_ip}; - - my $pn = getpeername($self->{sock}); - return _undef("peer_ip_string undef: getpeername") unless $pn; - - my ($port, $iaddr) = eval { - if (length($pn) >= 28) { - return Socket6::unpack_sockaddr_in6($pn); - } else { - return Socket::sockaddr_in($pn); - } - }; - - if ($@) { - $self->{peer_port} = "[Unknown peerport '$@']"; - return "[Unknown peername '$@']"; - } - - $self->{peer_port} = $port; - - if (length($iaddr) == 4) { - return $self->{peer_ip} = Socket::inet_ntoa($iaddr); - } else { - $self->{peer_v6} = 1; - return $self->{peer_ip} = Socket6::inet_ntop(Socket6::AF_INET6(), - $iaddr); - } -} - -=head2 C<< $obj->peer_addr_string() >> - -Returns the string describing the peer for the socket which underlies this -object in form "ip:port" - -=cut -sub peer_addr_string { - my Danga::Socket $self = shift; - my $ip = $self->peer_ip_string - or return undef; - return $self->{peer_v6} ? - "[$ip]:$self->{peer_port}" : - "$ip:$self->{peer_port}"; -} - -=head2 C<< $obj->local_ip_string() >> - -Returns the string describing the local IP - -=cut -sub local_ip_string { - my Danga::Socket $self = shift; - return _undef("local_ip_string undef: no sock") unless $self->{sock}; - return $self->{local_ip} if defined $self->{local_ip}; - - my $pn = getsockname($self->{sock}); - return _undef("local_ip_string undef: getsockname") unless $pn; - - my ($port, $iaddr) = Socket::sockaddr_in($pn); - $self->{local_port} = $port; - - return $self->{local_ip} = Socket::inet_ntoa($iaddr); -} - -=head2 C<< $obj->local_addr_string() >> - -Returns the string describing the local end of the socket which underlies this -object in form "ip:port" - -=cut -sub local_addr_string { - my Danga::Socket $self = shift; - my $ip = $self->local_ip_string; - return $ip ? "$ip:$self->{local_port}" : undef; -} - - -=head2 C<< $obj->as_string() >> - -Returns a string describing this socket. - -=cut -sub as_string { - my Danga::Socket $self = shift; - my $rw = "(" . ($self->{event_watch} & POLLIN ? 'R' : '') . - ($self->{event_watch} & POLLOUT ? 'W' : '') . ")"; - my $ret = ref($self) . "$rw: " . ($self->{closed} ? "closed" : "open"); - my $peer = $self->peer_addr_string; - if ($peer) { - $ret .= " to " . $self->peer_addr_string; - } - return $ret; -} - -sub _undef { - return undef unless $ENV{DS_DEBUG}; - my $msg = shift || ""; - warn "Danga::Socket: $msg\n"; - return undef; -} - -package Danga::Socket::Timer; -# [$abs_float_firetime, $coderef]; -sub cancel { - $_[0][1] = undef; -} - -=head1 AUTHORS - -Brad Fitzpatrick - author - -Michael Granger - docs, testing - -Mark Smith - contributor, heavy user, testing - -Matt Sergeant - kqueue support, docs, timers, other bits - -=head1 BUGS - -Not documented enough (but isn't that true of every project?). - -tcp_cork only works on Linux for now. No BSD push/nopush support. - -=head1 LICENSE - -License is granted to use and distribute this module under the same -terms as Perl itself. - -=cut - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Net/Netmask.pm b/lib/mogdeps/Net/Netmask.pm deleted file mode 100644 index 89908e20..00000000 --- a/lib/mogdeps/Net/Netmask.pm +++ /dev/null @@ -1,583 +0,0 @@ - -package Net::Netmask; - -use vars qw($VERSION); -$VERSION = 1.9011; - -require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(findNetblock findOuterNetblock findAllNetblock - cidrs2contiglists range2cidrlist sort_by_ip_address - dumpNetworkTable sort_network_blocks cidrs2cidrs - cidrs2inverse); -@EXPORT_OK = (@EXPORT, qw(int2quad quad2int %quadmask2bits - %quadhostmask2bits imask sameblock cmpblocks contains)); - -my $remembered = {}; -my %imask2bits; -my %size2bits; -my @imask; - -# our %quadmask2bits; -# our %quadhostmask2bits; - -use vars qw($error $debug %quadmask2bits %quadhostmask2bits); -$debug = 1; - -use strict; -use warnings; -use Carp; -use overload - '""' => \&desc, - '<=>' => \&cmp_net_netmask_block, - 'cmp' => \&cmp_net_netmask_block, - 'fallback' => 1; - -sub new -{ - my ($package, $net, $mask) = @_; - - $mask = '' unless defined $mask; - - my $base; - my $bits; - my $ibase; - undef $error; - - if ($net =~ m,^(\d+\.\d+\.\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ($1, $2); - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)[:/](\d+\.\d+\.\d+\.\d+)$,) { - $base = $1; - my $quadmask = $2; - if (exists $quadmask2bits{$quadmask}) { - $bits = $quadmask2bits{$quadmask}; - } else { - $error = "illegal netmask: $quadmask"; - } - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)[#](\d+\.\d+\.\d+\.\d+)$,) { - $base = $1; - my $hostmask = $2; - if (exists $quadhostmask2bits{$hostmask}) { - $bits = $quadhostmask2bits{$hostmask}; - } else { - $error = "illegal hostmask: $hostmask"; - } - } elsif (($net =~ m,^\d+\.\d+\.\d+\.\d+$,) - && ($mask =~ m,\d+\.\d+\.\d+\.\d+$,)) - { - $base = $net; - if (exists $quadmask2bits{$mask}) { - $bits = $quadmask2bits{$mask}; - } else { - $error = "illegal netmask: $mask"; - } - } elsif (($net =~ m,^\d+\.\d+\.\d+\.\d+$,) && - ($mask =~ m,0x[a-z0-9]+,i)) - { - $base = $net; - my $imask = hex($mask); - if (exists $imask2bits{$imask}) { - $bits = $imask2bits{$imask}; - } else { - $error = "illegal netmask: $mask ($imask)"; - } - } elsif ($net =~ /^\d+\.\d+\.\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ($net, 32); - } elsif ($net =~ /^\d+\.\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0", 24); - } elsif ($net =~ /^\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0.0", 16); - } elsif ($net =~ /^\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0.0.0", 8); - } elsif ($net =~ m,^(\d+\.\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ("$1.0", $2); - } elsif ($net =~ m,^(\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ("$1.0.0", $2); - } elsif ($net eq 'default' || $net eq 'any') { - ($base, $bits) = ("0.0.0.0", 0); - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)\s*-\s*(\d+\.\d+\.\d+\.\d+)$,) { - # whois format - $ibase = quad2int($1); - my $end = quad2int($2); - $error = "illegal dotted quad: $net" - unless defined($ibase) && defined($end); - my $diff = ($end || 0) - ($ibase || 0) + 1; - $bits = $size2bits{$diff}; - $error = "could not find exact fit for $net" - if ! defined $error && ( - ! defined $bits - || ($ibase & ~$imask[$bits])); - } else { - $error = "could not parse $net"; - $error .= " $mask" if $mask; - } - - carp $error if $error && $debug; - - $ibase = quad2int($base || 0) unless defined $ibase; - unless (defined($ibase) || defined($error)) { - $error = "could not parse $net"; - $error .= " $mask" if $mask; - } - $ibase &= $imask[$bits] - if defined $ibase && defined $bits; - - $bits = 0 unless $bits; - if ($bits > 32) { - $error = "illegal number of bits: $bits" - unless $error; - $bits = 32; - } - - return bless { - 'IBASE' => $ibase, - 'BITS' => $bits, - ( $error ? ( 'ERROR' => $error ) : () ), - }; -} - -sub new2 -{ - local($debug) = 0; - my $net = new(@_); - return undef if $error; - return $net; -} - -sub errstr { return $error; } -sub debug { my $this = shift; return (@_ ? $debug = shift : $debug) } - -sub base { my ($this) = @_; return int2quad($this->{'IBASE'}); } -sub bits { my ($this) = @_; return $this->{'BITS'}; } -sub size { my ($this) = @_; return 2**(32- $this->{'BITS'}); } -sub next { my ($this) = @_; int2quad($this->{'IBASE'} + $this->size()); } - -sub broadcast -{ - my($this) = @_; - int2quad($this->{'IBASE'} + $this->size() - 1); -} - -*first = \&base; -*last = \&broadcast; - -sub desc -{ - return int2quad($_[0]->{'IBASE'}).'/'.$_[0]->{'BITS'}; -} - -sub imask -{ - return (2**32 -(2** (32- $_[0]))); -} - -sub mask -{ - my ($this) = @_; - - return int2quad ( $imask[$this->{'BITS'}]); -} - -sub hostmask -{ - my ($this) = @_; - - return int2quad ( ~ $imask[$this->{'BITS'}]); -} - -sub nth -{ - my ($this, $index, $bitstep) = @_; - my $size = $this->size(); - my $ibase = $this->{'IBASE'}; - $bitstep = 32 unless $bitstep; - my $increment = 2**(32-$bitstep); - $index *= $increment; - $index += $size if $index < 0; - return undef if $index < 0; - return undef if $index >= $size; - return int2quad($ibase+$index); -} - -sub enumerate -{ - my ($this, $bitstep) = @_; - $bitstep = 32 unless $bitstep; - my $size = $this->size(); - my $increment = 2**(32-$bitstep); - my @ary; - my $ibase = $this->{'IBASE'}; - for (my $i = 0; $i < $size; $i += $increment) { - push(@ary, int2quad($ibase+$i)); - } - return @ary; -} - -sub inaddr -{ - my ($this) = @_; - my $ibase = $this->{'IBASE'}; - my $blocks = int($this->size()/256); - return (join('.',unpack('xC3', pack('V', $ibase))).".in-addr.arpa", - $ibase%256, $ibase%256+$this->size()-1) if $blocks == 0; - my @ary; - for (my $i = 0; $i < $blocks; $i++) { - push(@ary, join('.',unpack('xC3', pack('V', $ibase+$i*256))) - .".in-addr.arpa", 0, 255); - } - return @ary; -} - -sub tag -{ - my $this = shift; - my $tag = shift; - my $val = $this->{'T'.$tag}; - $this->{'T'.$tag} = $_[0] if @_; - return $val; -} - -sub quad2int -{ - my @bytes = split(/\./,$_[0]); - - return undef unless @bytes == 4 && ! grep {!(/\d+$/ && $_<256)} @bytes; - - return unpack("N",pack("C4",@bytes)); -} - -sub int2quad -{ - return join('.',unpack('C4', pack("N", $_[0]))); -} - -sub storeNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - $t->{$base} = [] unless exists $t->{$base}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - $t->{$base}->[$i] = $this; -} - -sub deleteNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - return unless defined $t->{$base}; - - undef $t->{$base}->[$i]; - - for my $x (@{$t->{$base}}) { - return if $x; - } - delete $t->{$base}; -} - -sub findNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - - my $ip = quad2int($ipquad); - my %done; - - for (my $b = 32; $b >= 0; $b--) { - my $nb = $ip & $imask[$b]; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, 32); - next if $done{$mb}++; - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if ($i < 0 or $i > 32); - while ($i >= 0) { - return $t->{$nb}->[$i] - if defined $t->{$nb}->[$i]; - $i--; - } - } -} - -sub findOuterNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - - my $ip; - my $mask; - if (ref($ipquad)) { - $ip = $ipquad->{IBASE}; - $mask = $ipquad->{BITS}; - } else { - $ip = quad2int($ipquad); - $mask = 32; - } - - for (my $b = 0; $b <= $mask; $b++) { - my $nb = $ip & $imask[$b];; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, $mask); - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if $i < 0; - confess "$mb, $b, $ipquad, $nb" if $i > 32; - while ($i >= 0) { - return $t->{$nb}->[$i] - if defined $t->{$nb}->[$i]; - $i--; - } - } -} - -sub findAllNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - my @ary ; - my $ip = quad2int($ipquad); - my %done; - - for (my $b = 32; $b >= 0; $b--) { - my $nb = $ip & $imask[$b]; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, 32); - next if $done{$mb}++; - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if $i < 0; - confess "$mb, $b, $ipquad, $nb" if $i > 32; - while ($i >= 0) { - push(@ary, $t->{$nb}->[$i]) - if defined $t->{$nb}->[$i]; - $i--; - } - } - return @ary; -} - -sub dumpNetworkTable -{ - my ($t) = @_; - $t = $remembered unless $t; - - my @ary; - foreach my $base (keys %$t) { - push(@ary, grep (defined($_), @{$t->{base}})); - for my $x (@{$t->{$base}}) { - push(@ary, $x) - if defined $x; - } - } - return sort @ary; -} - -sub checkNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - return defined $t->{$base}->[$i]; -} - -sub match -{ - my ($this, $ip) = @_; - my $i = quad2int($ip); - my $imask = $imask[$this->{BITS}]; - if (($i & $imask) == $this->{IBASE}) { - return (($i & ~ $imask) || "0 "); - } else { - return 0; - } -} - -sub maxblock -{ - my ($this) = @_; - return imaxblock($this->{'IBASE'}, $this->{'BITS'}); -} - -sub imaxblock -{ - my ($ibase, $tbit) = @_; - confess unless defined $ibase; - while ($tbit > 0) { - my $im = $imask[$tbit-1]; - last if (($ibase & $im) != $ibase); - $tbit--; - } - return $tbit; -} - -sub range2cidrlist -{ - my ($startip, $endip) = @_; - - my $start = quad2int($startip); - my $end = quad2int($endip); - - ($start, $end) = ($end, $start) - if $start > $end; - return irange2cidrlist($start, $end); -} - -sub irange2cidrlist -{ - my ($start, $end) = @_; - my @result; - while ($end >= $start) { - my $maxsize = imaxblock($start, 32); - my $maxdiff = 32 - int(log($end - $start + 1)/log(2)); - $maxsize = $maxdiff if $maxsize < $maxdiff; - push (@result, bless { - 'IBASE' => $start, - 'BITS' => $maxsize - }); - $start += 2**(32-$maxsize); - } - return @result; -} - -sub cidrs2contiglists -{ - my (@cidrs) = sort_network_blocks(@_); - my @result; - while (@cidrs) { - my (@r) = shift(@cidrs); - my $max = $r[0]->{IBASE} + $r[0]->size; - while ($cidrs[0] && $cidrs[0]->{IBASE} <= $max) { - my $nm = $cidrs[0]->{IBASE} + $cidrs[0]->size; - $max = $nm if $nm > $max; - push(@r, shift(@cidrs)); - } - push(@result, [@r]); - } - return @result; -} - -sub cidrs2cidrs -{ - my (@cidrs) = sort_network_blocks(@_); - my @result; - while (@cidrs) { - my (@r) = shift(@cidrs); - my $max = $r[0]->{IBASE} + $r[0]->size; - while ($cidrs[0] && $cidrs[0]->{IBASE} <= $max) { - my $nm = $cidrs[0]->{IBASE} + $cidrs[0]->size; - $max = $nm if $nm > $max; - push(@r, shift(@cidrs)); - } - my $start = $r[0]->{IBASE}; - my $end = $max - 1; - push(@result, irange2cidrlist($start, $end)); - } - return @result; -} - -sub cidrs2inverse -{ - my $outer = shift; - $outer = __PACKAGE__->new($outer) unless ref($outer); - my (@cidrs) = cidrs2cidrs(@_); - my $first = $outer->{IBASE}; - my $last = $first + $outer->size() -1; - shift(@cidrs) while $cidrs[0] && $cidrs[0]->{IBASE} + $cidrs[0]->size < $first; - my @r; - while (@cidrs && $first < $last) { - if ($first < $cidrs[0]->{IBASE}) { - if ($last <= $cidrs[0]->{IBASE}-1) { - return (@r, irange2cidrlist($first, $last)); - } - push(@r, irange2cidrlist($first, $cidrs[0]->{IBASE}-1)); - } - last if $cidrs[0]->{IBASE} > $last; - $first = $cidrs[0]->{IBASE} + $cidrs[0]->size; - shift(@cidrs); - } - if ($first < $last) { - push(@r, irange2cidrlist($first, $last)); - } - return @r; -} - -sub by_net_netmask_block -{ - $a->{'IBASE'} <=> $b->{'IBASE'} - || $a->{'BITS'} <=> $b->{'BITS'}; -} - -sub sameblock -{ - return ! cmpblocks(@_); -} - -sub cmpblocks -{ - my $this = shift; - my $class = ref $this; - my $other = (ref $_[0]) ? shift : $class->new(@_); - return cmp_net_netmask_block($this, $other); -} - -sub contains -{ - my $this = shift; - my $class = ref $this; - my $other = (ref $_[0]) ? shift : $class->new(@_); - return 0 if $this->{IBASE} > $other->{IBASE}; - return 0 if $this->{BITS} > $other->{BITS}; - return 0 if $other->{IBASE} > $this->{IBASE} + $this->size -1; - return 1; -} - -sub cmp_net_netmask_block -{ - return ($_[0]->{IBASE} <=> $_[1]->{IBASE} - || $_[0]->{BITS} <=> $_[1]->{BITS}); -} - -sub sort_network_blocks -{ - return - map $_->[0], - sort { $a->[1] <=> $b->[1] || $a->[2] <=> $b->[2] } - map [ $_, $_->{IBASE}, $_->{BITS} ], @_; - -} - -sub sort_by_ip_address -{ - return - map $_->[0], - sort { $a->[1] cmp $b->[1] } - map [ $_, pack("C4",split(/\./,$_)) ], @_; - -} - -BEGIN { - for (my $i = 0; $i <= 32; $i++) { - $imask[$i] = imask($i); - $imask2bits{$imask[$i]} = $i; - $quadmask2bits{int2quad($imask[$i])} = $i; - $quadhostmask2bits{int2quad(~$imask[$i])} = $i; - $size2bits{ 2**(32-$i) } = $i; - } -} -1; diff --git a/lib/mogdeps/Perlbal.pm b/lib/mogdeps/Perlbal.pm deleted file mode 100644 index eceabb26..00000000 --- a/lib/mogdeps/Perlbal.pm +++ /dev/null @@ -1,1377 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -=head1 NAME - -Perlbal - Reverse-proxy load balancer and webserver - -=head1 SEE ALSO - - http://www.danga.com/perlbal/ - -=head1 COPYRIGHT AND LICENSE - -Copyright 2004, Danga Interactive, Inc. -Copyright 2005-2007, Six Apart, Ltd. - -You can use and redistribute Perlbal under the same terms as Perl itself. - -=cut - -package Perlbal; - -BEGIN { - # keep track of anonymous subs' origins: - $^P |= 0x200; -} - -my $has_gladiator = eval "use Devel::Gladiator; 1;"; -my $has_cycle = eval "use Devel::Cycle; 1;"; -use Devel::Peek; - -use vars qw($VERSION); -$VERSION = '1.73'; - -use constant DEBUG => $ENV{PERLBAL_DEBUG} || 0; -use constant DEBUG_OBJ => $ENV{PERLBAL_DEBUG_OBJ} || 0; -use constant TRACK_STATES => $ENV{PERLBAL_TRACK_STATES} || 0; # if on, track states for "state changes" command - -use strict; -use warnings; -no warnings qw(deprecated); - -use Storable (); -use IO::Socket; -use IO::Handle; -use IO::File; - -$Perlbal::SYSLOG_AVAILABLE = eval { require Sys::Syslog; 1; }; -$Perlbal::BSD_RESOURCE_AVAILABLE = eval { require BSD::Resource; 1; }; - -# incremented every second by a timer: -$Perlbal::tick_time = time(); - -# Set to 1 when we open syslog, and 0 when we close it -$Perlbal::syslog_open = 0; - -use Getopt::Long; -use Carp qw(cluck croak); -use Errno qw(EBADF); -use POSIX (); - -our(%TrackVar); -sub track_var { - my ($name, $ref) = @_; - $TrackVar{$name} = $ref; -} - -use Perlbal::AIO; -use Perlbal::HTTPHeaders; -use Perlbal::Service; -use Perlbal::Socket; -use Perlbal::TCPListener; -use Perlbal::UploadListener; -use Perlbal::ClientManage; -use Perlbal::ClientHTTPBase; -use Perlbal::ClientProxy; -use Perlbal::ClientHTTP; -use Perlbal::BackendHTTP; -use Perlbal::ReproxyManager; -use Perlbal::Pool; -use Perlbal::ManageCommand; -use Perlbal::CommandContext; -use Perlbal::Util; - -$SIG{'PIPE'} = "IGNORE"; # handled manually - -our(%hooks); # hookname => subref -our(%service); # servicename -> Perlbal::Service -our(%pool); # poolname => Perlbal::Pool -our(%plugins); # plugin => 1 (shows loaded plugins) -our($last_error); -our $service_autonumber = 1; # used to generate names for anonymous services created with Perlbal->create_service() -our $vivify_pools = 1; # if on, allow automatic creation of pools -our $foreground = 1; # default to foreground -our $track_obj = 0; # default to not track creation locations -our $reqs = 0; # total number of requests we've done -our $starttime = time(); # time we started -our $pidfile = ''; # full path, default to not writing pidfile -# used by pidfile (only makes sense before run started) -# don't rely on this variable, it might change. -our $run_started = 0; -our ($lastutime, $laststime, $lastreqs) = (0, 0, 0); # for deltas - -our %PluginCase = (); # lowercase plugin name -> as file is named - -# setup XS status data structures -our %XSModules; # ( 'headers' => 'Perlbal::XS::HTTPHeaders' ) - -# now include XS files -eval "use Perlbal::XS::HTTPHeaders;"; # if we have it, load it - -# activate modules as necessary -if ($ENV{PERLBAL_XS_HEADERS} && $XSModules{headers}) { - Perlbal::XS::HTTPHeaders::enable(); -} - -# setup a USR1 signal handler that tells us to dump some basic statistics -# of how we're doing to the syslog -$SIG{'USR1'} = sub { - my $dumper = sub { Perlbal::log('info', $_[0]); }; - foreach my $svc (values %service) { - run_manage_command("show service $svc->{name}", $dumper); - } - run_manage_command('states', $dumper); - run_manage_command('queues', $dumper); -}; - -sub error { - $last_error = shift; - return 0; -} - -# Object instance counts, for debugging and leak detection -our(%ObjCount); # classname -> instances -our(%ObjTotal); # classname -> instances -our(%ObjTrack); # "$objref" -> creation location -sub objctor { - if (DEBUG_OBJ) { - my $ref = ref $_[0]; - $ref .= "-$_[1]" if $_[1]; - $ObjCount{$ref}++; - $ObjTotal{$ref}++; - - # now, if we're tracing leaks, note this object's creation location - if ($track_obj) { - my $i = 1; - my @list; - while (my $sub = (caller($i++))[3]) { - push @list, $sub; - } - $ObjTrack{"$_[0]"} = [ time, join(', ', @list) ]; - } - } -} -sub objdtor { - if (DEBUG_OBJ) { - my $ref = ref $_[0]; - $ref .= "-$_[1]" if $_[1]; - $ObjCount{$ref}--; - - # remove tracking for this object - if ($track_obj) { - delete $ObjTrack{"$_[0]"}; - } - } -} - -sub register_global_hook { - $hooks{$_[0]} = $_[1]; - return 1; -} - -sub unregister_global_hook { - delete $hooks{$_[0]}; - return 1; -} - -sub run_global_hook { - my $hookname = shift; - my $ref = $hooks{$hookname}; - return $ref->(@_) if defined $ref; # @_ is $mc (a Perlbal::ManageCommand) - return undef; -} - -sub service_names { - return sort keys %service; -} - -# class method: given a service name, returns a service object -sub service { - my $class = shift; - return $service{$_[0]}; -} - -sub create_service { - my $class = shift; - my $name = shift; - - unless (defined($name)) { - $name = "____auto_".($service_autonumber++); - } - - croak("service '$name' already exists") if $service{$name}; - croak("pool '$name' already exists") if $pool{$name}; - - # Create the new service and return it - return $service{$name} = Perlbal::Service->new($name); -} - -sub pool { - my $class = shift; - return $pool{$_[0]}; -} - -# given some plugin name, return its correct case -sub plugin_case { - my $pname = lc shift; - return $PluginCase{$pname} || $pname; -} - -# run a block of commands. returns true if they all passed -sub run_manage_commands { - my ($cmd_block, $out, $ctx) = @_; - - $ctx ||= Perlbal::CommandContext->new; - foreach my $cmd (split(/\n/, $cmd_block)) { - return 0 unless Perlbal::run_manage_command($cmd, $out, $ctx); - } - return 1; -} - -# allows ${ip:eth0} in config. currently the only supported expansion -sub _expand_config_var { - my $cmd = shift; - $cmd =~ /^(\w+):(.+)/ - or die "Unknown config variable: $cmd\n"; - my ($type, $val) = ($1, $2); - if ($type eq "ip") { - die "Bogus-looking iface name" unless $val =~ /^\w+$/; - my $conf = `/sbin/ifconfig $val`; - $conf =~ /inet addr:(\S+)/ - or die "Can't find IP of interface '$val'"; - return $1; - } - die "Unknown config variable type: $type\n"; -} - -# returns 1 if command succeeded, 0 otherwise -sub run_manage_command { - my ($cmd, $out, $ctx) = @_; # $out is output stream closure - - $cmd =~ s/\#.*//; - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - $cmd =~ s/\s+/ /g; - - my $orig = $cmd; # save original case for some commands - $cmd =~ s/^([^=]+)/lc $1/e; # lowercase everything up to an = - return 1 unless $cmd =~ /^\S/; - - # expand variables - $cmd =~ s/\$\{(.+?)\}/_expand_config_var($1)/eg; - $cmd =~ s/\$(\w+)/$ENV{$1}/g; - - $out ||= sub {}; - $ctx ||= Perlbal::CommandContext->new; - - my $err = sub { - $out->("ERROR: $_[0]"); - return 0; - }; - my $ok = sub { - $out->("OK") if $ctx->verbose; - return 1; - }; - - return $err->("invalid command") unless $cmd =~ /^(\w+)/; - my $basecmd = $1; - - my $mc = Perlbal::ManageCommand->new($basecmd, $cmd, $out, $ok, $err, $orig, $ctx); - - # for testing auto crashing and recovery: - if ($basecmd eq "crash") { die "Intentional crash." }; - - no strict 'refs'; - my $handler; - if ($Perlbal::{"MANAGE_$basecmd"} && ($handler = *{"MANAGE_$basecmd"}{CODE})) { - my $rv = eval { $handler->($mc); }; - return $mc->err($@) if $@; - return $rv; - } - - # if no handler found, look for plugins - - # call any hooks if they've been defined - my $rval = eval { run_global_hook("manage_command.$basecmd", $mc); }; - return $mc->err($@) if $@; - if (defined $rval) { - # commands may return boolean, or arrayref to mass-print - if (ref $rval eq "ARRAY") { - $mc->out($_) foreach @$rval; - return 1; - } - return $rval; - } - - return $mc->err("unknown command: $basecmd"); -} - -sub arena_ref_counts { - my $all = Devel::Gladiator::walk_arena(); - my %ct; - - my %run_cycle; - foreach my $it (@$all) { - $ct{ref $it}++; - if (ref $it eq "CODE") { - my $name = Devel::Peek::CvGV($it); - $ct{$name}++ if $name =~ /ANON/; - } - } - $all = undef; - return \%ct; -} - -my %last_gladiator; -sub MANAGE_gladiator { - my $mc = shift->no_opts; - unless ($has_gladiator) { - $mc->end; - return; - } - - my $ct = arena_ref_counts(); - my $ret; - $ret .= "ARENA COUNTS:\n"; - foreach my $k (sort {$ct->{$b} <=> $ct->{$a}} keys %$ct) { - my $delta = $ct->{$k} - ($last_gladiator{$k} || 0); - $last_gladiator{$k} = $ct->{$k}; - next unless $ct->{$k} > 1; - $ret .= sprintf(" %4d %-4d $k\n", $ct->{$k}, $delta); - } - - $mc->out($ret); - $mc->end; -} - -sub MANAGE_varsize { - my $mc = shift->no_opts; - - my $emit; - $emit = sub { - my ($v, $depth, $name) = @_; - $name ||= ""; - - my $show; - if (ref $v eq "ARRAY") { - return unless @$v; - $show = "[] " . scalar @$v; - } - elsif (ref $v eq "HASH") { - return unless %$v; - $show = "{} " . scalar keys %$v; - } - else { - $show = " = $v"; - } - my $pre = " " x $depth; - $mc->out("$pre$name $show"); - - if (ref $v eq "HASH") { - foreach my $k (sort keys %$v) { - $emit->($v->{$k}, $depth+1, "{$k}"); - } - } - }; - - foreach my $k (sort keys %TrackVar) { - my $v = $TrackVar{$k} or next; - $emit->($v, 0, $k); - } - - $mc->end; -} - -sub MANAGE_obj { - my $mc = shift->no_opts; - - foreach (sort keys %ObjCount) { - $mc->out("$_ = $ObjCount{$_} (tot=$ObjTotal{$_})"); - } - $mc->end; -} - -sub MANAGE_verbose { - my $mc = shift->parse(qr/^verbose (on|off)$/, - "usage: VERBOSE {on|off}"); - my $onoff = $mc->arg(1); - $mc->{ctx}->verbose(lc $onoff eq 'on' ? 1 : 0); - return $mc->ok; -} - -sub MANAGE_shutdown { - my $mc = shift->parse(qr/^shutdown(\s?graceful)?\s?(\d+)?$/); - - # immediate shutdown - exit(0) unless $mc->arg(1); - - # set connect ahead to 0 for all services so they don't spawn extra backends - foreach my $svc (values %service) { - $svc->{connect_ahead} = 0; - } - - # tell all sockets we're doing a graceful stop - my $sf = Perlbal::Socket->get_sock_ref; - foreach my $k (keys %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - $v->die_gracefully if $v->can("die_gracefully"); - } - - # register a post loop callback that will end the event loop when we only have - # a single socket left, the AIO socket - Perlbal::Socket->SetPostLoopCallback(sub { - my ($descmap, $otherfds) = @_; - - # Ghetto: duplicate the code we already had for our postloopcallback - Perlbal::Socket::run_callbacks(); - - # see what we have here; make sure we have no Clients and no unbored Backends - foreach my $sock (values %$descmap) { - my $ref = ref $sock; - return 1 if $ref =~ /^Perlbal::Client/ && $ref ne 'Perlbal::ClientManage'; - return 1 if $sock->isa('Perlbal::BackendHTTP') && $sock->{state} ne 'bored'; - } - return 0; # end the event loop and thus we exit perlbal - }); - - # If requested, register a callback to kill the perlbal process after a specified number of seconds - if (my $timeout = $mc->arg(2)) { - Perlbal::Socket::register_callback($timeout, sub { exit(0); }); - } - - # so they know something happened - return $mc->ok; -} - -sub MANAGE_mime { - my $mc = shift->parse(qr/^mime(?:\s+(\w+)(?:\s+(\w+))?(?:\s+(\S+))?)?$/); - my ($cmd, $arg1, $arg2) = ($mc->arg(1), $mc->arg(2), $mc->arg(3)); - - if (!$cmd || $cmd eq 'list') { - foreach my $key (sort keys %$Perlbal::ClientHTTPBase::MimeType) { - $mc->out("$key $Perlbal::ClientHTTPBase::MimeType->{$key}"); - } - $mc->end; - } elsif ($cmd eq 'set') { - if (!$arg1 || !$arg2) { - return $mc->err("Usage: set "); - } - - $Perlbal::ClientHTTPBase::MimeType->{$arg1} = $arg2; - return $mc->out("$arg1 set to $arg2."); - } elsif ($cmd eq 'remove') { - if (delete $Perlbal::ClientHTTPBase::MimeType->{$arg1}) { - return $mc->out("$arg1 removed."); - } else { - return $mc->err("$arg1 not a defined extension."); - } - } else { - return $mc->err("Usage: list, remove , add "); - } -} - -sub MANAGE_xs { - my $mc = shift->parse(qr/^xs(?:\s+(\w+)\s+(\w+))?$/); - my ($cmd, $module) = ($mc->arg(1), $mc->arg(2)); - - if ($cmd) { - # command? verify - return $mc->err('Known XS modules: ' . join(', ', sort keys %XSModules) . '.') - unless $XSModules{$module}; - - # okay, so now enable or disable this module - if ($cmd eq 'enable') { - my $res = eval "return $XSModules{$module}::enable();"; - return $mc->err("Unable to enable module.") - unless $res; - return $mc->ok; - } elsif ($cmd eq 'disable') { - my $res = eval "return $XSModules{$module}::disable();"; - return $mc->err("Unable to disable module.") - unless $res; - return $mc->out("Module disabled."); - } else { - return $mc->err('Usage: xs [ ]'); - } - } else { - # no commands, so just check status - $mc->out('XS module status:', ''); - foreach my $module (sort keys %XSModules) { - my $class = $XSModules{$module}; - my $enabled = eval "return \$${class}::Enabled;"; - my $status = defined $enabled ? ($enabled ? "installed, enabled" : - "installed, disabled") : "not installed"; - $mc->out(" $module: $status"); - } - $mc->out(' No modules available.') unless %XSModules; - $mc->out(''); - $mc->out("To enable a module: xs enable "); - $mc->out("To disable a module: xs disable "); - } - $mc->end; -} - -sub MANAGE_fd { - my $mc = shift->no_opts; - return $mc->err('This command is not available unless BSD::Resource is installed') unless $Perlbal::BSD_RESOURCE_AVAILABLE; - - # called in list context on purpose, but we want the hard limit - my (undef, $max) = BSD::Resource::getrlimit(BSD::Resource::RLIMIT_NOFILE()); - my $ct = 0; - - # first try procfs if one exists, as that's faster than iterating - if (opendir(DIR, "/proc/self/fd")) { - my @dirs = readdir(DIR); - $ct = scalar(@dirs) - 2; # don't count . and .. - closedir(DIR); - } else { - # isatty() is cheap enough to do on everything - foreach (0..$max) { - my $res = POSIX::isatty($_); - $ct++ if $res || ($! != EBADF); - } - } - $mc->out("max $max"); - $mc->out("cur $ct"); - $mc->end; -} - -sub MANAGE_proc { - my $mc = shift->no_opts; - - $mc->out('time: ' . time()); - $mc->out('pid: ' . $$); - - - if ($Perlbal::BSD_RESOURCE_AVAILABLE) { - my $ru = BSD::Resource::getrusage(); - my ($ut, $st) = ($ru->utime, $ru->stime); - my ($udelta, $sdelta) = ($ut - $lastutime, $st - $laststime); - $mc->out("utime: $ut (+$udelta)"); - $mc->out("stime: $st (+$sdelta)"); - ($lastutime, $laststime, $lastreqs) = ($ut, $st, $reqs); - } - - my $rdelta = $reqs - $lastreqs; - $mc->out("reqs: $reqs (+$rdelta)"); - $lastreqs = $reqs; - - $mc->end; -} - -sub MANAGE_nodes { - my $mc = shift->parse(qr/^nodes?(?:\s+(\d+.\d+.\d+.\d+)(?::(\d+))?)?$/); - - my ($ip, $port) = ($mc->arg(1), $mc->arg(2) || 80); - my $spec_ipport = $ip ? "$ip:$port" : undef; - my $ref = \%Perlbal::BackendHTTP::NodeStats; - - my $dump = sub { - my $ipport = shift; - foreach my $key (keys %{$ref->{$ipport}}) { - if (ref $ref->{$ipport}->{$key} eq 'ARRAY') { - my %temp; - $temp{$_}++ foreach @{$ref->{$ipport}->{$key}}; - foreach my $tkey (keys %temp) { - $mc->out("$ipport $key $tkey $temp{$tkey}"); - } - } else { - $mc->out("$ipport $key $ref->{$ipport}->{$key}"); - } - } - }; - - # dump a node, or all nodes - if ($spec_ipport) { - $dump->($spec_ipport); - } else { - foreach my $ipport (keys %$ref) { - $dump->($ipport); - } - } - - $mc->end; -} - -# singular also works for the nodes command -*MANAGE_node = \&MANAGE_nodes; - -sub MANAGE_prof { - my $mc = shift->parse(qr/^prof\w*\s+(on|off|data)$/); - my $which = $mc->arg(1); - - if ($which eq 'on') { - if (Danga::Socket->EnableProfiling) { - return $mc->ok; - } else { - return $mc->err('Unable to enable profiling. Please ensure you have the BSD::Resource module installed.'); - } - } - - if ($which eq 'off') { - Danga::Socket->DisableProfiling; - return $mc->ok; - } - - if ($which eq 'data') { - my $href = Danga::Socket->ProfilingData; - foreach my $key (sort keys %$href) { - my ($utime, $stime, $calls) = @{$href->{$key}}; - $mc->out(sprintf("%s %0.5f %0.5f %d %0.7f %0.7f", - $key, $utime, $stime, $calls, $utime / $calls, $stime / $calls)); - } - $mc->end; - } -} - -sub MANAGE_uptime { - my $mc = shift->no_opts; - - $mc->out("starttime $starttime"); - $mc->out("uptime " . (time() - $starttime)); - $mc->out("version $Perlbal::VERSION"); - $mc->end; -} - -*MANAGE_version = \&MANAGE_uptime; - -sub MANAGE_track { - my $mc = shift->no_opts; - - my $now = time(); - my @list; - foreach (keys %ObjTrack) { - my $age = $now - $ObjTrack{$_}->[0]; - push @list, [ $age, "${age}s $_: $ObjTrack{$_}->[1]" ]; - } - - # now output based on sorted age - foreach (sort { $a->[0] <=> $b->[0] } @list) { - $mc->out($_->[1]); - } - $mc->end; -} - -sub MANAGE_socks { - my $mc = shift->parse(qr/^socks(?: (\w+))?$/); - my $mode = $mc->arg(1) || "all"; - - my $sf = Perlbal::Socket->get_sock_ref; - - if ($mode eq "summary") { - my %count; - my $write_buf = 0; - my $open_files = 0; - while (my $k = each %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - $count{ref $v}++; - $write_buf += $v->{write_buf_size}; - if ($v->isa("Perlbal::ClientHTTPBase")) { - my Perlbal::ClientHTTPBase $cv = $v; - $open_files++ if $cv->{'reproxy_fh'}; - } - } - - foreach (sort keys %count) { - $mc->out(sprintf("%5d $_", $count{$_})); - } - $mc->out(); - $mc->out(sprintf("Aggregate write buffer: %.1fk", $write_buf / 1024)); - $mc->out(sprintf(" Open files: %d", $open_files)); - } elsif ($mode eq "all") { - my $now = time; - $mc->out(sprintf("%5s %6s", "fd", "age")); - foreach (sort { $a <=> $b } keys %$sf) { - my $sock = $sf->{$_}; - my $age; - eval { - $age = $now - $sock->{create_time}; - }; - $age ||= 0; - $mc->out(sprintf("%5d %5ds %s", $_, $age, $sock->as_string)); - } - } - $mc->end; -} - -sub MANAGE_backends { - my $mc = shift->no_opts; - - my $sf = Perlbal::Socket->get_sock_ref; - my %nodes; # { "Backend" => int count } - foreach my $sock (values %$sf) { - if ($sock->isa("Perlbal::BackendHTTP")) { - my Perlbal::BackendHTTP $cv = $sock; - $nodes{"$cv->{ipport}"}++; - } - } - - # now print out text - foreach my $node (sort keys %nodes) { - $mc->out("$node " . $nodes{$node}); - } - - $mc->end; -} - -sub MANAGE_noverify { - my $mc = shift->no_opts; - - # shows the amount of time left for each node marked as noverify - my $now = time; - foreach my $ipport (keys %Perlbal::BackendHTTP::NoVerify) { - my $until = $Perlbal::BackendHTTP::NoVerify{$ipport} - $now; - $mc->out("$ipport $until"); - } - $mc->end; -} - -sub MANAGE_pending { - my $mc = shift->no_opts; - - # shows pending backend connections by service, node, and age - my %pend; # { "service" => { "ip:port" => age } } - my $now = time; - - foreach my $svc (values %service) { - foreach my $ipport (keys %{$svc->{pending_connects}}) { - my Perlbal::BackendHTTP $be = $svc->{pending_connects}->{$ipport}; - next unless defined $be; - $pend{$svc->{name}}->{$ipport} = $now - $be->{create_time}; - } - } - - foreach my $name (sort keys %pend) { - foreach my $ipport (sort keys %{$pend{$name}}) { - $mc->out("$name $ipport $pend{$name}{$ipport}"); - } - } - $mc->end; -} - -sub MANAGE_states { - my $mc = shift->parse(qr/^states(?:\s+(.+))?$/); - - my $svc; - if (defined $mc->arg(1)) { - $svc = $service{$mc->arg(1)}; - return $mc->err("Service not found.") - unless defined $svc; - } - - my $sf = Perlbal::Socket->get_sock_ref; - - my %states; # { "Class" => { "State" => int count; } } - foreach my $sock (values %$sf) { - next unless $sock->can('state'); - my $state = $sock->state; - next unless defined $state; - if (defined $svc) { - next unless $sock->isa('Perlbal::ClientProxy') || - $sock->isa('Perlbal::BackendHTTP') || - $sock->isa('Perlbal::ClientHTTP'); - next unless $sock->{service} == $svc; - } - $states{ref $sock}->{$state}++; - } - - # now print out text - foreach my $class (sort keys %states) { - foreach my $state (sort keys %{$states{$class}}) { - $mc->out("$class $state " . $states{$class}->{$state}); - } - } - $mc->end; -} - -sub MANAGE_queues { - my $mc = shift->no_opts; - my $now = time; - - foreach my $svc (values %service) { - next unless $svc->{role} eq 'reverse_proxy'; - - my %queues = ( - normal => 'waiting_clients', - highpri => 'waiting_clients_highpri', - lowpri => 'waiting_clients_lowpri', - ); - - while (my ($queue_name, $clients_key) = each %queues) { - my $age = 0; - my $count = @{$svc->{$clients_key}}; - my Perlbal::ClientProxy $oldest = $svc->{$clients_key}->[0]; - $age = $now - $oldest->{last_request_time} if defined $oldest; - $mc->out("$svc->{name}-$queue_name.age $age"); - $mc->out("$svc->{name}-$queue_name.count $count"); - } - } - $mc->end; -} - -sub MANAGE_state { - my $mc = shift->parse(qr/^state changes$/); - my $hr = Perlbal::Socket->get_statechange_ref; - my %final; # { "state" => count } - while (my ($obj, $arref) = each %$hr) { - $mc->out("$obj: " . join(', ', @$arref)); - $final{$arref->[-1]}++; - } - foreach my $k (sort keys %final) { - $mc->out("$k $final{$k}"); - } - $mc->end; -} - -sub MANAGE_leaks { - my $mc = shift->parse(qr/^leaks(?:\s+(.+))?$/); - return $mc->err("command disabled without \$ENV{PERLBAL_DEBUG} set") - unless $ENV{PERLBAL_DEBUG}; - - my $what = $mc->arg(1); - - # iterates over active objects. if you specify an argument, it is treated as code - # with $_ being the reference to the object. - # shows objects that we think might have been leaked - my $ref = Perlbal::Socket::get_created_objects_ref; - foreach (@$ref) { - next unless $_; # might be undef! - if ($what) { - my $rv = eval "$what"; - return $mc->err("$@") if $@; - next unless defined $rv; - $mc->out($rv); - } else { - $mc->out($_->as_string); - } - } - $mc->end; -} - -sub MANAGE_show { - my $mc = shift; - - if ($mc->cmd =~ /^show service (\w+)$/) { - my $sname = $1; - my Perlbal::Service $svc = $service{$sname}; - return $mc->err("Unknown service") unless $svc; - $svc->stats_info($mc->out); - return $mc->end; - } - - if ($mc->cmd =~ /^show pool(?:\s+(\w+))?$/) { - my $pool = $1; - if ($pool) { - my $pl = $pool{$pool}; - return $mc->err("pool '$pool' does not exist") unless $pl; - - foreach my $node (@{ $pl->nodes }) { - my $ipport = "$node->[0]:$node->[1]"; - $mc->out($ipport . " " . $pl->node_used($ipport)); - } - } else { - foreach my $name (sort keys %pool) { - my Perlbal::Pool $pl = $pool{$name}; - $mc->out("$name nodes $pl->{node_count}"); - $mc->out("$name services $pl->{use_count}"); - } - } - return $mc->end; - } - - if ($mc->cmd =~ /^show service$/) { - foreach my $name (sort keys %service) { - my $svc = $service{$name}; - my $listen = $svc->{listen} || "not_listening"; - $mc->out("$name $listen " . ($svc->{enabled} ? "ENABLED" : "DISABLED")); - } - return $mc->end; - } - - return $mc->parse_error; -} - -sub MANAGE_server { - my $mc = shift->parse(qr/^server (\S+) ?= ?(.+)$/); - my ($key, $val) = ($mc->arg(1), $mc->arg(2)); - - if ($key =~ /^max_reproxy_connections(?:\((.+)\))?/) { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $hostip = $1; - if (defined $hostip) { - $Perlbal::ReproxyManager::ReproxyMax{$hostip} = $val+0; - } else { - $Perlbal::ReproxyManager::ReproxyGlobalMax = $val+0; - } - return $mc->ok; - } - - if ($key eq "max_connections") { - return $mc->err('This command is not available unless BSD::Resource is installed') unless $Perlbal::BSD_RESOURCE_AVAILABLE; - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $rv = BSD::Resource::setrlimit(BSD::Resource::RLIMIT_NOFILE(), $val, $val); - unless (defined $rv && $rv) { - if ($> == 0) { - $mc->err("Unable to set limit."); - } else { - $mc->err("Need to be root to increase max connections."); - } - } - return $mc->ok; - } - - if ($key eq "nice_level") { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $rv = POSIX::nice($val); - $mc->err("Unable to renice: $!") - unless defined $rv; - return $mc->ok; - } - - if ($key eq "aio_mode") { - return $mc->err("Unknown AIO mode") unless $val =~ /^none|linux|ioaio$/; - return $mc->err("Linux::AIO no longer supported") if $val eq "linux"; - return $mc->err("IO::AIO not available") if $val eq "ioaio" && ! $Perlbal::OPTMOD_IO_AIO; - $Perlbal::AIO_MODE = $val; - return $mc->ok; - } - - if ($key eq "aio_threads") { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - IO::AIO::min_parallel($val) - if $Perlbal::OPTMOD_IO_AIO; - return $mc->ok; - } - - if ($key eq "track_obj") { - return $mc->err("Expected 1 or 0") unless $val eq '1' || $val eq '0'; - $track_obj = $val + 0; - %ObjTrack = () if $val; # if we're turning it on, clear it out - return $mc->ok; - } - - if ($key eq "pidfile") { - return $mc->err("pidfile must be configured at startup, before Perlbal::run is called") if $run_started; - return $mc->err("Expected full pathname to pidfile") unless $val; - $pidfile = $val; - return $mc->ok; - } - - if ($key eq "crash_backtrace") { - return $mc->err("Expected 1 or 0") unless $val eq '1' || $val eq '0'; - if ($val) { - $SIG{__DIE__} = sub { Carp::confess(@_) }; - } else { - $SIG{__DIE__} = undef; - } - return $mc->ok; - } - - return $mc->err("unknown server option '$val'"); -} - -sub MANAGE_dumpconfig { - my $mc = shift; - - while (my ($name, $pool) = each %pool) { - $mc->out("CREATE POOL $name"); - - if ($pool->can("dumpconfig")) { - foreach my $line ($pool->dumpconfig) { - $mc->out(" $line"); - } - } else { - my $class = ref($pool); - $mc->out(" # Pool class '$class' is unable to dump config."); - } - } continue { - $mc->out(""); - } - - while (my ($name, $service) = each %service) { - $mc->out("CREATE SERVICE $name"); - - if ($service->can("dumpconfig")) { - foreach my $line ($service->dumpconfig) { - $mc->out(" $line"); - } - } else { - my $class = ref($service); - $mc->out(" # Service class '$class' is unable to dump config."); - } - - my $state = $service->{enabled} ? "ENABLE" : "DISABLE"; - $mc->out("$state $name"); - } continue { - $mc->out(""); - } - - return $mc->ok -} - -sub MANAGE_reproxy_state { - my $mc = shift; - Perlbal::ReproxyManager::dump_state($mc->out); - return 1; -} - -sub MANAGE_create { - my $mc = shift->parse(qr/^create (service|pool) (\w+)$/, - "usage: CREATE {service|pool} "); - my ($what, $name) = $mc->args; - - if ($what eq "service") { - return $mc->err("service '$name' already exists") if $service{$name}; - return $mc->err("pool '$name' already exists") if $pool{$name}; - Perlbal->create_service($name); - $mc->{ctx}{last_created} = $name; - return $mc->ok; - } - - if ($what eq "pool") { - return $mc->err("pool '$name' already exists") if $pool{$name}; - return $mc->err("service '$name' already exists") if $service{$name}; - $vivify_pools = 0; - $pool{$name} = Perlbal::Pool->new($name); - $mc->{ctx}{last_created} = $name; - return $mc->ok; - } -} - -sub MANAGE_use { - my $mc = shift->parse(qr/^use (\w+)$/, - "usage: USE "); - my ($name) = $mc->args; - return $mc->err("Non-existent pool or service '$name'") unless $pool{$name} || $service{$name}; - - $mc->{ctx}{last_created} = $name; - return $mc->ok; -} - -sub MANAGE_pool { - my $mc = shift->parse(qr/^pool (\w+) (\w+) (\d+.\d+.\d+.\d+)(?::(\d+))?$/); - my ($cmd, $name, $ip, $port) = $mc->args; - $port ||= 80; - - my $good_cmd = qr/^(?:add|remove)$/; - - # "add" and "remove" can be in either order - ($cmd, $name) = ($name, $cmd) if $name =~ /$good_cmd/; - return $mc->err("Invalid command: must be 'add' or 'remove'") - unless $cmd =~ /$good_cmd/; - - my $pl = $pool{$name}; - return $mc->err("Pool '$name' not found") unless $pl; - $pl->$cmd($ip, $port); - return $mc->ok; -} - -sub MANAGE_set { - my $mc = shift->parse(qr/^set (?:(\w+)[\. ])?([\w\.]+) ?= ?(.+)$/, - "usage: SET [] = "); - my ($name, $key, $val) = $mc->args; - unless ($name ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service/pool name not implied from context"); - } - - if (my Perlbal::Service $svc = $service{$name}) { - return $svc->set($key, $val, $mc); - } elsif (my Perlbal::Pool $pl = $pool{$name}) { - return $pl->set($key, $val, $mc); - } - return $mc->err("service/pool '$name' does not exist"); -} - - -sub MANAGE_header { - my $mc = shift->parse(qr/^header\s+(\w+)\s+(insert|remove)\s+(.+?)(?:\s*:\s*(.+))?$/i, - "Usage: HEADER {INSERT|REMOVE}
[: ]"); - - my ($svc_name, $action, $header, $val) = $mc->args; - my $svc = $service{$svc_name}; - return $mc->err("service '$svc_name' does not exist") unless $svc; - return $svc->header_management($action, $header, $val, $mc); -} - -sub MANAGE_enable { - my $mc = shift->parse(qr/^(disable|enable) (\w+)$/, - "Usage: {ENABLE|DISABLE} "); - my ($verb, $name) = $mc->args; - my $svc = $service{$name}; - return $mc->err("service '$name' does not exist") unless $svc; - return $svc->$verb($mc); -} -*MANAGE_disable = \&MANAGE_enable; - -sub MANAGE_unload { - my $mc = shift->parse(qr/^unload (\w+)$/); - my ($fn) = $mc->args; - $fn = $PluginCase{lc $fn}; - my $rv = eval "Perlbal::Plugin::$fn->unload; 1;"; - $plugins{$fn} = 0; - return $mc->ok; -} - - -sub MANAGE_load { - my $mc = shift->parse(qr/^load \w+$/); - - my $fn; - $fn = $1 if $mc->orig =~ /^load (\w+)$/i; - - my $last_case; - my $last_class; - - my $load = sub { - my $name = shift; - $last_case = $name; - my $class = $last_class = "Perlbal::Plugin::$name"; - my $rv = eval "use $class; $class->load; 1;"; - return $mc->err($@) if ! $rv && $@ !~ /^Can\'t locate/; - return $rv; - }; - - my $rv = $load->($fn) || $load->(lc $fn) || $load->(ucfirst lc $fn); - return $mc->err($@) unless $rv; - - $PluginCase{lc $fn} = $last_case; - $plugins{$last_case} = $last_class; - - return $mc->ok; -} - -sub MANAGE_reload { - my $mc = shift->parse(qr/^reload (\w+)$/); - my ($fn) = $mc->args; - - my $class = $PluginCase{lc $fn} or - return $mc->err("Unknown/unloaded plugin '$fn'"); - $class = "Perlbal::Plugin::$class"; - - eval "$class->can_reload" or - return $mc->err("Plugin $class doesn't support reloading"); - - if ($class->can("pre_reload_unload")) { - eval "$class->pre_reload_unload; 1" or - return $mc->err("Error running $class->pre_reload_unload: $@"); - } - - eval "$class->unload; 1;" or - return $mc->err("Failed to unload $class: $@"); - - my $file = $class . ".pm"; - $file =~ s!::!/!g; - - delete $INC{$file} or - die $mc->err("Didn't find $file in %INC"); - - no warnings 'redefine'; - eval "use $class; $class->load; 1;" or - return $mc->err("Failed to reload: $@"); - - return $mc->ok; -} - -sub MANAGE_plugins { - my $mc = shift->no_opts; - foreach my $svc (values %service) { - next unless @{$svc->{plugin_order}}; - $mc->out(join(' ', $svc->{name}, @{$svc->{plugin_order}})); - } - $mc->end; -} - -sub MANAGE_help { - my $mc = shift->no_opts; - my @commands = sort map { m/^MANAGE_(\S+)$/ ? $1 : () } - keys %Perlbal::; - foreach my $command (@commands) { - $mc->out("$command"); - } - $mc->end; -} - -sub MANAGE_aio { - my $mc = shift->no_opts; - my $stats = Perlbal::AIO::get_aio_stats(); - foreach my $c (sort keys %$stats) { - my $r = $stats->{$c}; - foreach my $k (keys %$r) { - $mc->out("$c $k $r->{$k}"); - } - } - $mc->end; -} - -sub load_config { - my ($file, $writer) = @_; - open (my $fh, $file) or die "Error opening config file ($file): $!\n"; - my $ctx = Perlbal::CommandContext->new; - $ctx->verbose(0); - while (my $line = <$fh>) { - return 0 unless run_manage_command($line, $writer, $ctx); - } - close($fh); - return 1; -} - -sub daemonize { - my($pid, $sess_id, $i); - - # note that we're not in the foreground (for logging purposes) - $foreground = 0; - - # required before fork: (as of old Linux::AIO 1.1, still true?) - IO::AIO::max_parallel(0) - if $Perlbal::OPTMOD_IO_AIO; - - ## Fork and exit parent - if ($pid = fork) { exit 0; } - - ## Detach ourselves from the terminal - croak "Cannot detach from controlling terminal" - unless $sess_id = POSIX::setsid(); - - ## Prevent possibility of acquiring a controlling terminal - $SIG{'HUP'} = 'IGNORE'; - if ($pid = fork) { exit 0; } - - ## Change working directory - chdir "/"; - - ## Clear file creation mask - umask 0; - - ## Close open file descriptors - close(STDIN); - close(STDOUT); - close(STDERR); - - ## Reopen stderr, stdout, stdin to /dev/null - open(STDIN, "+>/dev/null"); - open(STDOUT, "+>&STDIN"); - open(STDERR, "+>&STDIN"); -} - -# For other apps using Danga::Socket that want to embed Perlbal, this can be called -# directly to start it up. You can call this as many times as you like; it'll -# only actually do what it does the first time it's called. -sub initialize { - unless ($run_started) { - $run_started = 1; - - # number of AIO threads. the number of outstanding requests isn't - # affected by this - IO::AIO::min_parallel(3) if $Perlbal::OPTMOD_IO_AIO; - - # register IO::AIO pipe which gets written to from threads - # doing blocking IO - if ($Perlbal::OPTMOD_IO_AIO) { - Perlbal::Socket->AddOtherFds(IO::AIO::poll_fileno() => - \&IO::AIO::poll_cb); - } - - # The fact that this only runs the first time someone calls initialize() - # means that some things which depend on it might be unreliable when - # used in an embedded perlbal if there is a race for multiple components - # to call initialize(). - run_global_hook("pre_event_loop"); - } -} - -# This is the function to call if you want Perlbal to be in charge of the event loop. -# It won't return until Perlbal is somehow told to exit. -sub run { - - # setup for logging - Sys::Syslog::openlog('perlbal', 'pid', 'daemon') if $Perlbal::SYSLOG_AVAILABLE; - $Perlbal::syslog_open = 1; - Perlbal::log('info', 'beginning run'); - my $pidfile_written = 0; - $pidfile_written = _write_pidfile( $pidfile ) if $pidfile; - - Perlbal::initialize(); - - Danga::Socket->SetLoopTimeout(1000); - Danga::Socket->SetPostLoopCallback(sub { - $Perlbal::tick_time = time(); - Perlbal::Socket::run_callbacks(); - return 1; - }); - - # begin the overall loop to try to capture if Perlbal dies at some point - # so we can have a log of it - eval { - # wait for activity - Perlbal::Socket->EventLoop(); - }; - - my $clean_exit = 1; - - # closing messages - if ($@) { - Perlbal::log('crit', "crash log: $_") foreach split(/\r?\n/, $@); - $clean_exit = 0; - } - - # Note: This will only actually remove the pidfile on 'shutdown graceful' - # A more reliable approach might be to have a pidfile object which fires - # removal on DESTROY. - _remove_pidfile( $pidfile ) if $pidfile_written; - - Perlbal::log('info', 'ending run'); - $Perlbal::syslog_open = 0; - Sys::Syslog::closelog() if $Perlbal::SYSLOG_AVAILABLE; - - return $clean_exit; -} - -sub log { - # simple logging functionality - if ($foreground) { - # syslog acts like printf so we have to use printf and append a \n - shift; # ignore the first parameter (info, warn, crit, etc) - printf(shift(@_) . "\n", @_); - } else { - # just pass the parameters to syslog - Sys::Syslog::syslog(@_) if $Perlbal::syslog_open; - } -} - - -sub _write_pidfile { - my $file = shift; - - my $fh; - unless (open($fh, ">$file")) { - Perlbal::log('info', "couldn't create pidfile '$file': $!" ); - return 0; - } - unless ((print $fh "$$\n") && close($fh)) { - Perlbal::log('info', "couldn't write into pidfile '$file': $!" ); - _remove_pidfile($file); - return 0; - } - return 1; -} - - -sub _remove_pidfile { - my $file = shift; - - unlink $file; - return 1; -} - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: - -1; diff --git a/lib/mogdeps/Perlbal/AIO.pm b/lib/mogdeps/Perlbal/AIO.pm deleted file mode 100644 index fac72abb..00000000 --- a/lib/mogdeps/Perlbal/AIO.pm +++ /dev/null @@ -1,284 +0,0 @@ -# AIO abstraction layer -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::AIO; - -use strict; -use POSIX qw(ENOENT EACCES EBADF); -use Fcntl qw(SEEK_CUR SEEK_SET SEEK_END O_RDWR O_CREAT O_TRUNC); - -# Try and use IO::AIO, if it's around. -BEGIN { - $Perlbal::OPTMOD_IO_AIO = eval "use IO::AIO 1.6 (); 1;"; -} - -END { - IO::AIO::max_parallel(0) - if $Perlbal::OPTMOD_IO_AIO; -} - -$Perlbal::AIO_MODE = "none"; -$Perlbal::AIO_MODE = "ioaio" if $Perlbal::OPTMOD_IO_AIO; - -############################################################################ -# AIO functions available to callers -############################################################################ - -sub aio_readahead { - my ($fh, $offset, $length, $user_cb) = @_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_readahead($fh, $offset, $length, $cb); - } else { - $cb->(); - } - }); -} - -sub aio_stat { - my ($file, $user_cb) = @_; - - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_stat($file, $cb); - } else { - stat($file); - $cb->(); - } - }); -} - -sub aio_open { - my ($file, $flags, $mode, $user_cb) = @_; - - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_open($file, $flags, $mode, $cb); - } else { - my $fh; - my $rv = sysopen($fh, $file, $flags, $mode); - $cb->($rv ? $fh : undef); - } - }); -} - -sub aio_unlink { - my ($file, $user_cb) = @_; - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_unlink($file, $cb); - } else { - my $rv = unlink($file); - $rv = $rv ? 0 : -1; - $cb->($rv); - } - }); -} - -sub aio_write { - # 0 1 2 3(data) 4 - my ($fh, $offset, $length, undef, $user_cb) = @_; - return no_fh($user_cb) unless $fh; - my $alist = \@_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_write($fh, $offset, $length, $alist->[3], 0, $cb); - } else { - my $old_off = sysseek($fh, 0, SEEK_CUR); - sysseek($fh, $offset, 0); - my $rv = syswrite($fh, $alist->[3], $length, 0); - sysseek($fh, $old_off, SEEK_SET); - $cb->($rv); - } - }); -} - -sub aio_read { - # 0 1 2 3(data) 4 - my ($fh, $offset, $length, undef, $user_cb) = @_; - return no_fh($user_cb) unless $fh; - my $alist = \@_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_read($fh, $offset, $length, $alist->[3], 0, $cb); - } else { - my $old_off = sysseek($fh, 0, SEEK_CUR); - sysseek($fh, $offset, 0); - my $rv = sysread($fh, $alist->[3], $length, 0); - sysseek($fh, $old_off, SEEK_SET); - $cb->($rv); - } - }); -} - -############################################################################ -# AIO channel stuff -# prevents all AIO threads from being consumed by requests for same -# failing/overloaded disk by isolating them into separate 'channels' in -# parent process and not dispatching more than the max in-flight count -# allows. think of a channel as a named queue. or in reality, a disk. -############################################################################ - -my %chan_outstanding; # $channel_name -> $num_in_flight -my %chan_pending; # $channel_name -> [ [$subref, $cb], .... ] -my %chan_hitmaxdepth; # $channel_name -> $times_enqueued (not dispatched immediately) -my %chan_submitct; # $channel_name -> $times_submitted (total AIO requests for this channel) -my $use_aio_chans = 0; # keep them off for now, until mogstored code is ready to use them -my $file_to_chan_hook; # coderef that returns $chan_name given a $filename - -my %chan_concurrency; # $channel_name -> concurrency per channel - # (cache. definitive version via function call) - -sub get_aio_stats { - my $ret = {}; - foreach my $c (keys %chan_outstanding) { - $ret->{$c} = { - cur_running => $chan_outstanding{$c}, - ctr_queued => $chan_hitmaxdepth{$c} || 0, - ctr_total => $chan_submitct{$c}, - }; - } - - foreach my $c (keys %chan_pending) { - my $rec = $ret->{$c} ||= {}; - $rec->{cur_queued} = scalar @{$chan_pending{$c}}; - } - - return $ret; -} - -# (external API). set trans hook, but also enables AIO channels. -sub set_file_to_chan_hook { - $file_to_chan_hook = shift; # coderef that returns $chan_name given a $filename - $use_aio_chans = 1; -} - -# internal API: -sub aio_channel_push { - my ($chan, $user_cb, $action) = @_; - - # if we were to do it immediately, bypassing AIO channels (future option?) - unless ($use_aio_chans) { - $action->($user_cb); - return; - } - - # IO::AIO/etc only take one callback. so we wrap the user - # (caller) function with our own that first calls theirs, then - # does our bookkeeping and queue management afterwards. - my $wrapped_cb = sub { - $user_cb->(@_); - $chan_outstanding{$chan}--; - aio_channel_cond_run($chan); - }; - - # in case this is the first time this queue has been used, init stuff: - my $chanpend = ($chan_pending{$chan} ||= []); - $chan_outstanding{$chan} ||= 0; - $chan_submitct{$chan}++; - - my $max_out = $chan_concurrency{$chan} ||= aio_chan_max_concurrent($chan); - - if ($chan_outstanding{$chan} < $max_out) { - $chan_outstanding{$chan}++; - $action->($wrapped_cb); - return; - } else { - # too deep. enqueue. - $chan_hitmaxdepth{$chan}++; - push @$chanpend, [$action, $wrapped_cb]; - } -} - -sub aio_chan_max_concurrent { - my ($chan) = @_; - return 100 if $chan eq '[default]'; - return 10; -} - -sub aio_channel_cond_run { - my ($chan) = @_; - - my $chanpend = $chan_pending{$chan} or return; - my $max_out = $chan_concurrency{$chan} ||= aio_chan_max_concurrent($chan); - - my $job; - while ($chan_outstanding{$chan} < $max_out && ($job = shift @$chanpend)) { - $chan_outstanding{$chan}++; - $job->[0]->($job->[1]); - } -} - -my $next_chan; -sub set_channel { - $next_chan = shift; -} - -sub set_file_for_channel { - my ($file) = @_; - if ($file_to_chan_hook) { - $next_chan = $file_to_chan_hook->($file); - } else { - $next_chan = undef; - } -} - -# gets currently-set channel, then clears it. or if none set, -# lets registered hook set the channel name from the optional -# $file parameter. the default channel, '[default]' has no limits -sub get_chan { - return undef unless $use_aio_chans; - my ($file) = @_; - set_file_for_channel($file) if $file; - - if (my $chan = $next_chan) { - $next_chan = undef; - return $chan; - } - - return "[default]"; -} - -############################################################################ -# misc util functions -############################################################################ - -sub _fh_of_fd_mode { - my ($fd, $mode) = @_; - return undef unless defined $fd && $fd >= 0; - - #TODO: use the write MODE for the given $mode; - my $fh = IO::Handle->new_from_fd($fd, 'r+'); - my $num = fileno($fh); - return $fh; -} - -sub no_fh { - my $cb = shift; - - my $i = 1; - my $stack_trace = ""; - while (my ($pkg, $filename, $line, $subroutine, $hasargs, - $wantarray, $evaltext, $is_require, $hints, $bitmask) = caller($i++)) { - $stack_trace .= " at $filename:$line $subroutine\n"; - } - - Perlbal::log("crit", "Undef \$fh: $stack_trace"); - $cb->(undef); - return undef; -} - -1; diff --git a/lib/mogdeps/Perlbal/BackendHTTP.pm b/lib/mogdeps/Perlbal/BackendHTTP.pm deleted file mode 100644 index f3d0b382..00000000 --- a/lib/mogdeps/Perlbal/BackendHTTP.pm +++ /dev/null @@ -1,755 +0,0 @@ -###################################################################### -# HTTP connection to backend node -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::BackendHTTP; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('client', # Perlbal::ClientProxy connection, or undef - 'service', # Perlbal::Service - 'pool', # Perlbal::Pool; whatever pool we spawned from - 'ip', # IP scalar - 'port', # port scalar - 'ipport', # "$ip:$port" - 'reportto', # object; must implement reporter interface - - 'has_attention', # has been accepted by a webserver and - # we know for sure we're not just talking - # to the TCP stack - - 'waiting_options', # if true, we're waiting for an OPTIONS * - # response to determine when we have attention - - 'disconnect_at', # time this connection will be disconnected, - # if it's kept-alive and backend told us. - # otherwise undef for unknown. - - # The following only apply when the backend server sends - # a content-length header - 'content_length', # length of document being transferred - 'content_length_remain', # bytes remaining to be read - - 'use_count', # number of requests this backend's been used for - 'generation', # int; counts what generation we were spawned in - 'buffered_upload_mode', # bool; if on, we're doing a buffered upload transmit - - 'scratch' # for plugins - ); -use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM SOL_SOCKET SO_ERROR - AF_UNIX PF_UNSPEC - ); -use IO::Handle; - -use Perlbal::ClientProxy; - -# if this is made too big, (say, 128k), then perl does malloc instead -# of using its slab cache. -use constant BACKEND_READ_SIZE => 61449; # 60k, to fit in a 64k slab - -# keys set here when an endpoint is found to not support persistent -# connections and/or the OPTIONS method -our %NoVerify; # { "ip:port" => next-verify-time } -our %NodeStats; # { "ip:port" => { ... } }; keep statistics about nodes - -# constructor for a backend connection takes a service (pool) that it's -# for, and uses that service to get its backend IP/port, as well as the -# client that will be using this backend connection. final parameter is -# an options hashref that contains some options: -# reportto => object obeying reportto interface -sub new { - my Perlbal::BackendHTTP $self = shift; - my ($svc, $ip, $port, $opts) = @_; - $opts ||= {}; - - my $sock; - socket $sock, PF_INET, SOCK_STREAM, IPPROTO_TCP; - - unless ($sock && defined fileno($sock)) { - Perlbal::log('crit', "Error creating socket: $!"); - return undef; - } - my $inet_aton = Socket::inet_aton($ip); - unless ($inet_aton) { - Perlbal::log('crit', "inet_aton failed creating socket for $ip"); - return undef; - } - - IO::Handle::blocking($sock, 0); - connect $sock, Socket::sockaddr_in($port, $inet_aton); - - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); - - Perlbal::objctor($self); - - $self->{ip} = $ip; # backend IP - $self->{port} = $port; # backend port - $self->{ipport} = "$ip:$port"; # often used as key - $self->{service} = $svc; # the service we're serving for - $self->{pool} = $opts->{pool}; # what pool we came from. - $self->{reportto} = $opts->{reportto} || $svc; # reportto if specified - $self->state("connecting"); - - # mark another connection to this ip:port - $NodeStats{$self->{ipport}}->{attempts}++; - $NodeStats{$self->{ipport}}->{lastattempt} = $self->{create_time}; - - # setup callback in case we get stuck in connecting land - Perlbal::Socket::register_callback(15, sub { - if ($self->state eq 'connecting' || $self->state eq 'verifying_backend') { - # shouldn't still be connecting/verifying ~15 seconds after create - $self->close('callback_timeout'); - } - return 0; - }); - - # for header reading: - $self->init; - - $self->watch_write(1); - return $self; -} - -sub init { - my $self = shift; - $self->{req_headers} = undef; - $self->{res_headers} = undef; # defined w/ headers object once all headers in - $self->{headers_string} = ""; # blank to start - $self->{generation} = $self->{service}->{generation}; - $self->{read_size} = 0; # total bytes read from client - - $self->{client} = undef; # Perlbal::ClientProxy object, initially empty - # until we ask our service for one - - $self->{has_attention} = 0; - $self->{use_count} = 0; - $self->{buffered_upload_mode} = 0; -} - - -sub new_process { - my ($class, $svc, $prog) = @_; - - my ($psock, $csock); - socketpair($csock, $psock, AF_UNIX, SOCK_STREAM, PF_UNSPEC) - or die "socketpair: $!"; - - $csock->autoflush(1); - $psock->autoflush(1); - - my $pid = fork; - unless (defined $pid) { - warn "fork failed: $!\n"; - return undef; - } - - # child process - unless ($pid) { - close(STDIN); - close(STDOUT); - #close(STDERR); - open(STDIN, '<&', $psock); - open(STDOUT, '>&', $psock); - #open(STDERR, ">/dev/null"); - exec $prog; - } - - close($psock); - my $sock = $csock; - - my $self = fields::new($class); - $self->SUPER::new($sock); - Perlbal::objctor($self); - - $self->{ipport} = $prog; # often used as key - $self->{service} = $svc; # the service we're serving for - $self->{reportto} = $svc; # reportto interface (same as service) - $self->state("connecting"); - - $self->init; - $self->watch_write(1); - return $self; -} - -sub close { - my Perlbal::BackendHTTP $self = shift; - - # OSX Gives EPIPE on bad connects, and doesn't fail the connect - # so lets treat EPIPE as a event_err so the logic there does - # the right thing - if (defined $_[0] && $_[0] eq 'EPIPE') { - $self->event_err; - return; - } - - # don't close twice - return if $self->{closed}; - - # this closes the socket and sets our closed flag - $self->SUPER::close(@_); - - # tell our client that we're gone - if (my $client = $self->{client}) { - $client->backend(undef); - $self->{client} = undef; - } - - # tell our owner that we're gone - if (my $reportto = $self->{reportto}) { - $reportto->note_backend_close($self); - $self->{reportto} = undef; - } -} - -# return our defined generation counter with no parameter, -# or set our generation if given a parameter -sub generation { - my Perlbal::BackendHTTP $self = $_[0]; - return $self->{generation} unless $_[1]; - return $self->{generation} = $_[1]; -} - -# return what ip and port combination we're using -sub ipport { - my Perlbal::BackendHTTP $self = $_[0]; - return $self->{ipport}; -} - -# called to tell backend that the client has gone on to do something else now. -sub forget_client { - my Perlbal::BackendHTTP $self = $_[0]; - $self->{client} = undef; -} - -# called by service when it's got a client for us, or by ourselves -# when we asked for a client. -# returns true if client assignment was accepted. -sub assign_client { - my Perlbal::BackendHTTP $self = shift; - my Perlbal::ClientProxy $client = shift; - return 0 if $self->{client}; - - my $svc = $self->{service}; - - # set our client, and the client's backend to us - $svc->mark_node_used($self->{ipport}); - $self->{client} = $client; - $self->state("sending_req"); - $self->{client}->backend($self); - - my Perlbal::HTTPHeaders $hds = $client->{req_headers}->clone; - $self->{req_headers} = $hds; - - my $client_ip = $client->peer_ip_string; - - # I think I've seen this be undef in practice. Double-check - unless ($client_ip) { - warn "Undef client_ip ($client) in assign_client. Closing."; - $client->close; - return 0; - } - - # Use HTTP/1.0 to backend (FIXME: use 1.1 and support chunking) - $hds->set_version("1.0"); - - my $persist = $svc->{persist_backend}; - - $hds->header("Connection", $persist ? "keep-alive" : "close"); - - if ($svc->{enable_reproxy}) { - $hds->header("X-Proxy-Capabilities", "reproxy-file"); - } - - # decide whether we trust the upstream or not, to give us useful - # forwarding info headers - if ($svc->trusted_ip($client_ip)) { - # yes, we trust our upstream, so just append our client's IP - # to the existing list of forwarded IPs, if we're a blind proxy - # then don't append our IP to the end of the list. - unless ($svc->{blind_proxy}) { - my @ips = split /,\s*/, ($hds->header("X-Forwarded-For") || ''); - $hds->header("X-Forwarded-For", join ", ", @ips, $client_ip); - } - } else { - # no, don't trust upstream (untrusted client), so remove all their - # forwarding headers and tag their IP as the x-forwarded-for - $hds->header("X-Forwarded-For", $client_ip); - $hds->header("X-Host", undef); - $hds->header("X-Forwarded-Host", undef); - } - - $self->tcp_cork(1); - $client->state('backend_req_sent'); - - $self->{content_length} = undef; - $self->{content_length_remain} = undef; - - # run hooks - return 1 if $svc->run_hook('backend_client_assigned', $self); - - # now cleanup the headers before we send to the backend - $svc->munge_headers($hds) if $svc; - - $self->write($hds->to_string_ref); - $self->write(sub { - $self->tcp_cork(0); - if (my $client = $self->{client}) { - # start waiting on a reply - $self->watch_read(1); - $self->state("wait_res"); - $client->state('wait_res'); - $client->backend_ready($self); - } - }); - - return 1; -} - -# called by ClientProxy after we tell it our backend is ready and -# it has an upload ready on disk -sub invoke_buffered_upload_mode { - my Perlbal::BackendHTTP $self = shift; - - # so, we're receiving a buffered upload, we need to go ahead and - # start the buffered upload retransmission to backend process. we - # have to turn watching for writes on, since that's what is doing - # the triggering, NOT the normal client proxy watch for read - $self->{buffered_upload_mode} = 1; - $self->watch_write(1); -} - -# Backend -sub event_write { - my Perlbal::BackendHTTP $self = shift; - print "Backend $self is writeable!\n" if Perlbal::DEBUG >= 2; - - my $now = time(); - delete $NoVerify{$self->{ipport}} if - defined $NoVerify{$self->{ipport}} && - $NoVerify{$self->{ipport}} < $now; - - if (! $self->{client} && $self->{state} eq "connecting") { - # not interested in writes again until something else is - $self->watch_write(0); - $NodeStats{$self->{ipport}}->{connects}++; - $NodeStats{$self->{ipport}}->{lastconnect} = $now; - - # OSX returns writeable even if the connect fails - # so explicitly check for the error - # TODO: make a smaller test case and show to the world - if (my $error = unpack('i', getsockopt($self->{sock}, SOL_SOCKET, SO_ERROR))) { - $self->event_err; - return; - } - - if (defined $self->{service} && $self->{service}->{verify_backend} && - !$self->{has_attention} && !defined $NoVerify{$self->{ipport}}) { - - return if $self->{service}->run_hook('backend_write_verify', $self); - - # the backend should be able to answer this incredibly quickly. - $self->write("OPTIONS " . $self->{service}->{verify_backend_path} . " HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - $self->watch_read(1); - $self->{waiting_options} = 1; - $self->{content_length_remain} = undef; - $self->state("verifying_backend"); - } else { - # register our boredom (readiness for a client/request) - $self->state("bored"); - $self->{reportto}->register_boredom($self); - } - return; - } - - # if we have a client, and we're currently doing a buffered upload - # sendfile, then tell the client to continue sending us data - if ($self->{client} && $self->{buffered_upload_mode}) { - $self->{client}->continue_buffered_upload($self); - return; - } - - my $done = $self->write(undef); - $self->watch_write(0) if $done; -} - -sub verify_success { - my Perlbal::BackendHTTP $self = shift; - $self->{waiting_options} = 0; - $self->{has_attention} = 1; - $NodeStats{$self->{ipport}}->{verifies}++; - $self->next_request(1); # initial - return; -} - -sub verify_failure { - my Perlbal::BackendHTTP $self = shift; - $NoVerify{$self->{ipport}} = time() + 60; - $self->{reportto}->note_bad_backend_connect($self); - $self->close('no_keep_alive'); - return; -} - -sub event_read_waiting_options { # : void - my Perlbal::BackendHTTP $self = shift; - - if (defined $self->{service}) { - return if $self->{service}->run_hook('backend_readable_verify', $self); - } - - if ($self->{content_length_remain}) { - # the HTTP/1.1 spec says OPTIONS responses can have content-lengths, - # but the meaning of the response is reserved for a future spec. - # this just gobbles it up for. - my $bref = $self->read(BACKEND_READ_SIZE); - return $self->verify_failure unless defined $bref; - $self->{content_length_remain} -= length($$bref); - } elsif (my $hd = $self->read_response_headers) { - # see if we have keep alive support - return $self->verify_failure unless $hd->res_keep_alive_options; - $self->{content_length_remain} = $hd->header("Content-Length"); - } - - # if we've got the option response and read any response data - # if present: - if ($self->{res_headers} && ! $self->{content_length_remain}) { - $self->verify_success; - } - return; -} - -sub handle_response { # : void - my Perlbal::BackendHTTP $self = shift; - my Perlbal::HTTPHeaders $hd = $self->{res_headers}; - my Perlbal::ClientProxy $client = $self->{client}; - - print "BackendHTTP: handle_response\n" if Perlbal::DEBUG >= 2; - - my $res_code = $hd->response_code; - - # keep a rolling window of the last 500 response codes - my $ref = ($NodeStats{$self->{ipport}}->{responsecodes} ||= []); - push @$ref, $res_code; - if (scalar(@$ref) > 500) { - shift @$ref; - } - - # call service response received function - return if $self->{reportto}->backend_response_received($self); - - # standard handling - $self->state("xfer_res"); - $client->state("xfer_res"); - $self->{has_attention} = 1; - - # RFC 2616, Sec 4.4: Messages MUST NOT include both a - # Content-Length header field and a non-identity - # transfer-coding. If the message does include a non- - # identity transfer-coding, the Content-Length MUST be - # ignored. - my $te = $hd->header("Transfer-Encoding"); - if ($te && $te !~ /\bidentity\b/i) { - $hd->header("Content-Length", undef); - } - - my Perlbal::HTTPHeaders $rqhd = $self->{req_headers}; - - # setup our content length so we know how much data to expect, in general - # we want the content-length from the response, but if this was a head request - # we know it's a 0 length message the client wants - if ($rqhd->request_method eq 'HEAD') { - $self->{content_length} = 0; - } else { - $self->{content_length} = $hd->content_length; - } - $self->{content_length_remain} = $self->{content_length} || 0; - - my $reproxy_cache_for = $hd->header('X-REPROXY-CACHE-FOR') || 0; - - # special cases: reproxying and retrying after server errors: - if ((my $rep = $hd->header('X-REPROXY-FILE')) && $self->may_reproxy) { - # make the client begin the async IO while we move on - $self->next_request; - $client->start_reproxy_file($rep, $hd); - return; - } elsif ((my $urls = $hd->header('X-REPROXY-URL')) && $self->may_reproxy) { - $self->next_request; - $self->{service}->add_to_reproxy_url_cache($rqhd, $hd) - if $reproxy_cache_for; - $client->start_reproxy_uri($hd, $urls); - return; - } elsif ((my $svcname = $hd->header('X-REPROXY-SERVICE')) && $self->may_reproxy) { - $self->next_request; - $self->{client} = undef; - $client->start_reproxy_service($hd, $svcname); - return; - } elsif ($res_code == 500 && - $rqhd->request_method =~ /^GET|HEAD$/ && - $client->should_retry_after_500($self)) { - # eh, 500 errors are rare. just close and don't spend effort reading - # rest of body's error message to no client. - $self->close; - - # and tell the client to try again with a new backend - $client->retry_after_500($self->{service}); - return; - } - - # regular path: - my $res_source = $client->{primary_res_hdrs} || $hd; - my $thd = $client->{res_headers} = $res_source->clone; - - # setup_keepalive will set Connection: and Keep-Alive: headers for us - # as well as setup our HTTP version appropriately - $client->setup_keepalive($thd); - - # if we had an alternate primary response header, make sure - # we send the real content-length (from the reproxied URL) - # and not the one the first server gave us - if ($client->{primary_res_hdrs}) { - $thd->header('Content-Length', $hd->header('Content-Length')); - $thd->header('X-REPROXY-FILE', undef); - $thd->header('X-REPROXY-URL', undef); - $thd->header('X-REPROXY-EXPECTED-SIZE', undef); - $thd->header('X-REPROXY-CACHE-FOR', undef); - - # also update the response code, in case of 206 partial content - my $rescode = $hd->response_code; - if ($rescode == 206 || $rescode == 416) { - $thd->code($rescode); - $thd->header('Accept-Ranges', $hd->header('Accept-Ranges')) if $hd->header('Accept-Ranges'); - $thd->header('Content-Range', $hd->header('Content-Range')) if $hd->header('Content-Range'); - } - $thd->code(200) if $thd->response_code == 204; # upgrade HTTP No Content (204) to 200 OK. - } - - print " writing response headers to client\n" if Perlbal::DEBUG >= 3; - $client->write($thd->to_string_ref); - - print(" content_length=", (defined $self->{content_length} ? $self->{content_length} : "(undef)"), - " remain=", (defined $self->{content_length_remain} ? $self->{content_length_remain} : "(undef)"), "\n") - if Perlbal::DEBUG >= 3; - - if (defined $self->{content_length} && ! $self->{content_length_remain}) { - print " done. detaching.\n" if Perlbal::DEBUG >= 3; - # order important: next_request detaches us from client, so - # $client->close can't kill us - $self->next_request; - $client->write(sub { - $client->backend_finished; - }); - } -} - -sub may_reproxy { - my Perlbal::BackendHTTP $self = shift; - my Perlbal::Service $svc = $self->{service}; - return 0 unless $svc; - return $svc->{enable_reproxy}; -} - -# Backend -sub event_read { - my Perlbal::BackendHTTP $self = shift; - print "Backend $self is readable!\n" if Perlbal::DEBUG >= 2; - - return $self->event_read_waiting_options if $self->{waiting_options}; - - my Perlbal::ClientProxy $client = $self->{client}; - - # with persistent connections, sometimes we have a backend and - # no client, and backend becomes readable, either to signal - # to use the end of the stream, or because a bad request error, - # which I can't totally understand. in any case, we have - # no client so all we can do is close this backend. - return $self->close('read_with_no_client') unless $client; - - unless ($self->{res_headers}) { - return unless $self->read_response_headers; - return $self->handle_response; - } - - # if our client's behind more than the max limit, stop buffering - if ($client->too_far_behind_backend) { - $self->watch_read(0); - $client->{backend_stalled} = 1; - return; - } - - my $bref = $self->read(BACKEND_READ_SIZE); - - if (defined $bref) { - $client->write($bref); - - # HTTP/1.0 keep-alive support to backend. we just count bytes - # until we hit the end, then we know we can send another - # request on this connection - if ($self->{content_length}) { - $self->{content_length_remain} -= length($$bref); - if (! $self->{content_length_remain}) { - # order important: next_request detaches us from client, so - # $client->close can't kill us - $self->next_request; - $client->write(sub { $client->backend_finished; }); - } - } - return; - } else { - # backend closed - print "Backend $self is done; closing...\n" if Perlbal::DEBUG >= 1; - - $client->backend(undef); # disconnect ourselves from it - $self->{client} = undef; # .. and it from us - $self->close('backend_disconnect'); # close ourselves - - $client->write(sub { $client->backend_finished; }); - return; - } -} - -# if $initial is on, then don't increment use count -sub next_request { - my Perlbal::BackendHTTP $self = $_[0]; - my $initial = $_[1]; - - # don't allow this if we're closed - return if $self->{closed}; - - # set alive_time so reproxy can intelligently reuse this backend - my $now = time(); - $self->{alive_time} = $now; - $NodeStats{$self->{ipport}}->{requests}++ unless $initial; - $NodeStats{$self->{ipport}}->{lastresponse} = $now; - - my $hd = $self->{res_headers}; # response headers - - # verify that we have keep-alive support. by passing $initial to res_keep_alive, - # we signal that req_headers may be undef (if we just did an options request) - return $self->close('next_request_no_persist') - unless $hd->res_keep_alive($self->{req_headers}, $initial); - - # and now see if we should closed based on the pool we're from - return $self->close('pool_requested_closure') - if $self->{pool} && ! $self->{pool}->backend_should_live($self); - - # we've been used - $self->{use_count}++ unless $initial; - - # service specific - if (my Perlbal::Service $svc = $self->{service}) { - # keep track of how many times we've been used, and don't - # keep using this connection more times than the service - # is configured for. - if ($svc->{max_backend_uses} && ($self->{use_count} > $svc->{max_backend_uses})) { - return $self->close('exceeded_max_uses'); - } - } - - # if backend told us, keep track of when the backend - # says it's going to boot us, so we don't use it within - # a few seconds of that time - if (($hd->header("Keep-Alive") || '') =~ /\btimeout=(\d+)/i) { - $self->{disconnect_at} = $now + $1; - } else { - $self->{disconnect_at} = undef; - } - - $self->{client} = undef; - - $self->state("bored"); - $self->watch_write(0); - - $self->{req_headers} = undef; - $self->{res_headers} = undef; - $self->{headers_string} = ""; - $self->{req_headers} = undef; - - $self->{read_size} = 0; - $self->{content_length_remain} = undef; - $self->{content_length} = undef; - $self->{buffered_upload_mode} = 0; - - $self->{reportto}->register_boredom($self); - return; -} - -# Backend: bad connection to backend -sub event_err { - my Perlbal::BackendHTTP $self = shift; - - # FIXME: we get this after backend is done reading and we disconnect, - # hence the misc checks below for $self->{client}. - - print "BACKEND event_err\n" if - Perlbal::DEBUG >= 2; - - if ($self->{client}) { - # request already sent to backend, then an error occurred. - # we don't want to duplicate POST requests, so for now - # just fail - # TODO: if just a GET request, retry? - $self->{client}->close('backend_error'); - $self->close('error'); - return; - } - - if ($self->{state} eq "connecting" || - $self->{state} eq "verifying_backend") { - # then tell the service manager that this connection - # failed, so it can spawn a new one and note the dead host - $self->{reportto}->note_bad_backend_connect($self, 1); - } - - # close ourselves first - $self->close("error"); -} - -# Backend -sub event_hup { - my Perlbal::BackendHTTP $self = shift; - print "HANGUP for $self\n" if Perlbal::DEBUG; - $self->close("after_hup"); -} - -sub as_string { - my Perlbal::BackendHTTP $self = shift; - - my $ret = $self->SUPER::as_string; - my $name = $self->{sock} ? getsockname($self->{sock}) : undef; - my $lport = $name ? (Socket::sockaddr_in($name))[0] : undef; - $ret .= ": localport=$lport" if $lport; - if (my Perlbal::ClientProxy $cp = $self->{client}) { - $ret .= "; client=$cp->{fd}"; - } - $ret .= "; uses=$self->{use_count}; $self->{state}"; - if (defined $self->{service} && $self->{service}->{verify_backend}) { - $ret .= "; has_attention="; - $ret .= $self->{has_attention} ? 'yes' : 'no'; - } - - return $ret; -} - -sub die_gracefully { - # see if we need to die - my Perlbal::BackendHTTP $self = shift; - $self->close('graceful_death') if $self->state eq 'bored'; -} - -sub DESTROY { - Perlbal::objdtor($_[0]); - $_[0]->SUPER::DESTROY; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Cache.pm b/lib/mogdeps/Perlbal/Cache.pm deleted file mode 100644 index 8bff5b37..00000000 --- a/lib/mogdeps/Perlbal/Cache.pm +++ /dev/null @@ -1,244 +0,0 @@ -# (This is a copy of Cache::SimpleLRU.) -# License to use and redistribute this under the same terms as Perl itself. - -package Perlbal::Cache; - -use strict; -use fields qw(items size tail head maxsize); - -use vars qw($VERSION); -use constant PREVREF => 0; # ptr left, to newer item -use constant VALUE => 1; -use constant NEXTREF => 2; # ptr right, to older item -use constant KEY => 3; # copy of key for unlinking from namespace on fallout - -$VERSION = '1.0'; - -sub new { - my $class = shift; - my $self = fields::new($class); - my $args = @_ == 1 ? $_[0] : { @_ }; - - $self->{head} = undef, - $self->{tail} = undef, - $self->{items} = {}; # key -> arrayref, indexed by constants above - $self->{size} = 0; - $self->{maxsize} = $args->{maxsize}+0; - return $self; -} - -# need to DESTROY to cleanup doubly-linked list (circular refs) -sub DESTROY { - my $self = shift; - $self->set_maxsize(0); - $self->validate_list; -} - -# calls $code->($val) for each value in cache. $code must return true -# to continue walking. foreach returns true if you hit the end. -sub foreach { - my Perlbal::Cache $self = shift; - my $code = shift; - my $iter = $self->{head}; - while ($iter) { - my $val = $iter->[VALUE]; - $iter = $iter->[NEXTREF]; - last unless $code->($val); - } - return $iter ? 0 : 1; -} - -sub size { - my Perlbal::Cache $self = shift; - return $self->{size}; -} - -sub maxsize { - my Perlbal::Cache $self = shift; - return $self->{maxsize}; -} - -sub set_maxsize { - my ($self, $maxsize) = @_; - $self->{maxsize} = $maxsize; - $self->drop_tail while - $self->{size} > $self->{maxsize}; -} - -# For debugging only -sub validate_list { - my ($self) = @_; - - die "no tail pointer\n" if $self->{size} && ! $self->{tail}; - die "no head pointer\n" if $self->{size} && ! $self->{head}; - die "unwanted tail pointer\n" if ! $self->{size} && $self->{tail}; - die "unwanted head pointer\n" if ! $self->{size} && $self->{head}; - - my $iter = $self->{head}; - my $last = undef; - my $count = 1; - while ($count <= $self->{size}) { - if (! defined $iter) { - die "undefined iterator on element \#$count (trying to get to size $self->{size})\n"; - } - my $key = $iter->[KEY]; - my $it_via_hash = $self->{items}->{$key} or - die "item '$key' found in list, but not in hash\n"; - - unless ($it_via_hash == $iter) { - die "Hash value of '$key' maps to different node than we found.\n"; - } - - if ($count == 1 && $iter->[PREVREF]) { - die "Head element shouldn't have previous pointer!\n"; - } - if ($count == $self->{size} && $iter->[NEXTREF]) { - die "Last element shouldn't have next pointer!\n"; - } - if ($iter->[NEXTREF] && $iter->[NEXTREF]->[PREVREF] != $iter) { - die "next's previous should be us.\n"; - } - if ($last && $iter->[PREVREF] != $last) { - die "defined \$last but its previous isn't us.\n"; - } - if ($last && $last->[NEXTREF] != $iter) { - die "defined \$last but our next isn't it\n"; - } - if (!$last && $iter->[PREVREF]) { - die "uh, we have a nextref but shouldn't\n"; - } - - $last = $iter; - $iter = $iter->[NEXTREF]; - $count++; - } - return 1; -} - -sub drop_tail { - my Perlbal::Cache $self = shift; - die "no tail (size)" unless $self->{size}; - - ## who's going to die? - my $to_die = $self->{tail} or die "no tail (key)"; - - ## set the tail to the item before the one dying. - $self->{tail} = $self->{tail}->[PREVREF]; - - ## adjust the forward pointer on the tail to be undef - if (defined $self->{tail}) { - $self->{tail}->[NEXTREF] = undef; - } - - ## kill the item - delete $self->{items}->{$to_die->[KEY]}; - - ## shrink the overall size - $self->{size}--; - - if (!$self->{size}) { - $self->{head} = undef; - } -} - -sub get { - my Perlbal::Cache $self = shift; - my ($key) = @_; - - my $item = $self->{items}{$key} or - return undef; - - # promote this to the head - unless ($self->{head} == $item) { - if ($self->{tail} == $item) { - $self->{tail} = $item->[PREVREF]; - } - - # remove this element from the linked list. - my $next = $item->[NEXTREF]; - my $prev = $item->[PREVREF]; - if ($next) { $next->[PREVREF] = $prev; } - if ($prev) { $prev->[NEXTREF] = $next; } - - # make current head point backwards to this item - $self->{head}->[PREVREF] = $item; - - # make this item point forwards to current head, and backwards nowhere - $item->[NEXTREF] = $self->{head}; - $item->[PREVREF] = undef; - - # make this the new head - $self->{head} = $item; - } - - return $item->[VALUE]; -} - -sub remove { - my Perlbal::Cache $self = shift; - my ($key) = @_; - - my $item = $self->{items}{$key} or - return 0; - delete $self->{items}{$key}; - $self->{size}--; - - if (!$self->{size}) { - $self->{head} = undef; - $self->{tail} = undef; - return 1; - } - - if ($self->{head} == $item) { - $self->{head} = $item->[NEXTREF]; - $self->{head}->[PREVREF] = undef; - return 1; - } - if ($self->{tail} == $item) { - $self->{tail} = $item->[PREVREF]; - $self->{tail}->[NEXTREF] = undef; - return 1; - } - - # remove from middle - $item->[PREVREF]->[NEXTREF] = $item->[NEXTREF]; - $item->[NEXTREF]->[PREVREF] = $item->[PREVREF]; - return 1; - -} - -sub set { - my Perlbal::Cache $self = shift; - my ($key, $value) = @_; - - $self->drop_tail while - $self->{maxsize} && - $self->{size} >= $self->{maxsize} && - ! exists $self->{items}->{$key}; - - if (exists $self->{items}->{$key}) { - # update the value - my $it = $self->{items}->{$key}; - $it->[VALUE] = $value; - } else { - # stick it at the end, for now - my $it = $self->{items}->{$key} = []; - $it->[PREVREF] = undef; - $it->[NEXTREF] = undef; - $it->[KEY] = $key; - $it->[VALUE] = $value; - if ($self->{size}) { - $self->{tail}->[NEXTREF] = $it; - $it->[PREVREF] = $self->{tail}; - } else { - $self->{head} = $it; - } - $self->{tail} = $it; - $self->{size}++; - } - - # this will promote it to the top: - $self->get($key); -} - -1; diff --git a/lib/mogdeps/Perlbal/ChunkedUploadState.pm b/lib/mogdeps/Perlbal/ChunkedUploadState.pm deleted file mode 100644 index be1ec8a8..00000000 --- a/lib/mogdeps/Perlbal/ChunkedUploadState.pm +++ /dev/null @@ -1,64 +0,0 @@ -package Perlbal::ChunkedUploadState; -use strict; - -sub new { - my ($pkg, %args) = @_; - my $self = bless { - 'buf' => '', - 'bytes_remain' => 0, # remaining in chunk (ignoring final 2 byte CRLF) - }, $pkg; - foreach my $k (qw(on_new_chunk on_disconnect on_zero_chunk)) { - $self->{$k} = (delete $args{$k}) || sub {}; - } - die "bogus args" if %args; - return $self; -} - -sub on_readable { - my ($self, $ds) = @_; - my $rbuf = $ds->read(131072); - unless (defined $rbuf) { - $self->{on_disconnect}->(); - return; - } - - $self->{buf} .= $$rbuf; - - while ($self->drive_machine) {} -} - -# returns 1 if progress was made parsing buffer -sub drive_machine { - my $self = shift; - - my $buflen = length($self->{buf}); - return 0 unless $buflen; - - if (my $br = $self->{bytes_remain}) { - my $extract = $buflen > $br ? $br : $buflen; - my $ch = substr($self->{buf}, 0, $extract, ''); - $self->{bytes_remain} -= $extract; - die "assert" if $self->{bytes_remain} < 0; - $self->{on_new_chunk}->(\$ch); - return 1; - } - - return 0 unless $self->{buf} =~ s/^(?:\r\n)?([0-9a-fA-F]+)(?:;.*)?\r\n//; - $self->{bytes_remain} = hex($1); - - if ($self->{bytes_remain} == 0) { - # FIXME: new state machine state for trailer parsing/discarding. - # (before we do on_zero_chunk). for now, though, just assume - # no trailers and throw away the extra post-trailer \r\n that - # is probably in this packet. hacky. - $self->{buf} =~ s/^\r\n//; - $self->{hit_zero} = 1; - $self->{on_zero_chunk}->(); - return 0; - } - return 1; -} - -sub hit_zero_chunk { $_[0]{hit_zero} } - -1; diff --git a/lib/mogdeps/Perlbal/ClientHTTP.pm b/lib/mogdeps/Perlbal/ClientHTTP.pm deleted file mode 100644 index c4919a2d..00000000 --- a/lib/mogdeps/Perlbal/ClientHTTP.pm +++ /dev/null @@ -1,475 +0,0 @@ -###################################################################### -# HTTP Connection from a reverse proxy client. GET/HEAD only. -# most functionality is implemented in the base class. -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ClientHTTP; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::ClientHTTPBase"; -use Perlbal::Util; - -use fields ('put_in_progress', # 1 when we're currently waiting for an async job to return - 'put_fh', # file handle to use for writing data - 'put_fh_filename', # filename of put_fh - 'put_pos', # file offset to write next data at - - 'content_length', # length of document being transferred - 'content_length_remain', # bytes remaining to be read - 'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef - ); - -use HTTP::Date (); -use File::Path; - -use Errno qw( EPIPE ); -use POSIX qw( O_CREAT O_TRUNC O_WRONLY O_RDONLY ENOENT ); - -# class list of directories we know exist -our (%VerifiedDirs); - -sub new { - my $class = shift; - - my $self = fields::new($class); - $self->SUPER::new(@_); - $self->init; - return $self; -} - -# upcasting a generic ClientHTTPBase (from a service selector) to a -# "full-fledged" ClientHTTP. -sub new_from_base { - my $class = shift; - my Perlbal::ClientHTTPBase $cb = shift; # base object - Perlbal::Util::rebless($cb, $class); - $cb->init; - - $cb->watch_read(1); # enable our reads, so we can get PUT/POST data - $cb->handle_request; # this will disable reads, if GET/HEAD/etc - return $cb; -} - -sub init { - my Perlbal::ClientHTTP $self = shift; - $self->{put_in_progress} = 0; - $self->{put_fh} = undef; - $self->{put_pos} = 0; - $self->{chunked_upload_state} = undef; -} - -sub close { - my Perlbal::ClientHTTP $self = shift; - - # don't close twice - return if $self->{closed}; - - $self->{put_fh} = undef; - $self->SUPER::close(@_); -} - -sub event_read { - my Perlbal::ClientHTTP $self = shift; - $self->{alive_time} = $Perlbal::tick_time; - - # see if we have headers? - if ($self->{req_headers}) { - if ($self->{req_headers}->request_method eq 'PUT') { - $self->event_read_put; - } else { - # since we have headers and we're not doing any special - # handling above, let's just disable read notification, because - # we won't do anything with the data - $self->watch_read(0); - } - return; - } - - # try and get the headers, if they're all here - my $hd = $self->read_request_headers - or return; - - $self->handle_request; -} - -# one-time routing of new request to the right handlers -sub handle_request { - my Perlbal::ClientHTTP $self = shift; - my $hd = $self->{req_headers}; - - $self->check_req_headers; - - # fully formed request received - $self->{requests}++; - - # notify that we're about to serve - return if $self->{service}->run_hook('start_web_request', $self); - return if $self->{service}->run_hook('start_http_request', $self); - - # GET/HEAD requests (local, from disk) - if ($hd->request_method eq 'GET' || $hd->request_method eq 'HEAD') { - # and once we have it, start serving - $self->watch_read(0); - return $self->_serve_request($hd); - } - - # PUT requests - return $self->handle_put if $hd->request_method eq 'PUT'; - - # DELETE requests - return $self->handle_delete if $hd->request_method eq 'DELETE'; - - # else, bad request - return $self->send_response(400); -} - -sub handle_put { - my Perlbal::ClientHTTP $self = shift; - my $hd = $self->{req_headers}; - - return $self->send_response(403) unless $self->{service}->{enable_put}; - - return if $self->handle_put_chunked; - - # they want to put something, so let's setup and wait for more reads - my $clen = - $self->{content_length} = - $self->{content_length_remain} = - $hd->header('Content-length') + 0; - - # return a 400 (bad request) if we got no content length or if it's - # bigger than any specified max put size - return $self->send_response(400, "Content-length of $clen is invalid.") - if ! defined($clen) || - $clen < 0 || - ($self->{service}->{max_put_size} && - $clen > $self->{service}->{max_put_size}); - - # if we are supposed to read data and have some data already from a header over-read, note it - if ($clen && defined $self->{read_ahead} && $self->{read_ahead} > 0) { - $self->{content_length_remain} -= $self->{read_ahead}; - } - - return if $self->{service}->run_hook('handle_put', $self); - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - return $self->send_response(400, 'Invalid filename') - unless $uri =~ m!^ - ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is - # one+ conservative characters - / # path separator - ([\w\-\.]+) # $2: and the filename, one+ conservative characters - $!x; - - # sanitize uri into path and file into a disk path and filename - my ($path, $filename) = ($1 || '', $2); - - # the final action we'll be taking, eventually, is to start an async - # file open of the requested disk path. but we might need to verify - # the min_put_directory first. - my $start_open = sub { - my $disk_path = $self->{service}->{docroot} . '/' . $path; - $self->start_put_open($disk_path, $filename); - }; - - # verify minput if necessary - if ($self->{service}->{min_put_directory}) { - my @elems = grep { defined $_ && length $_ } split '/', $path; - return $self->send_response(400, 'Does not meet minimum directory requirement') - unless scalar(@elems) >= $self->{service}->{min_put_directory}; - my $req_path = '/' . join('/', splice(@elems, 0, $self->{service}->{min_put_directory})); - my $extra_path = '/' . join('/', @elems); - $self->validate_min_put_directory($req_path, $extra_path, $filename, $start_open); - } else { - $start_open->(); - } - - return; -} - -sub handle_put_chunked { - my Perlbal::ClientHTTP $self = shift; - my $req_hd = $self->{req_headers}; - my $te = $req_hd->header("Transfer-Encoding"); - return unless $te && $te eq "chunked"; - - my $eh = $req_hd->header("Expect"); - if ($eh && $eh =~ /\b100-continue\b/) { - $self->write(\ "HTTP/1.1 100 Continue\r\n\r\n"); - } - - my $max_size = $self->{service}{max_chunked_request_size}; - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - return $self->send_response(400, 'Invalid filename') - unless $uri =~ m!^ - ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is - # one+ conservative characters - / # path separator - ([\w\-\.]+) # $2: and the filename, one+ conservative characters - $!x; - - # sanitize uri into path and file into a disk path and filename - my ($path, $filename) = ($1 || '', $2); - - my $disk_path = $self->{service}->{docroot} . '/' . $path; - - $self->{chunked_upload_state} = Perlbal::ChunkedUploadState->new(%{{ - on_new_chunk => sub { - my $cref = shift; - my $len = length($$cref); - push @{$self->{read_buf}}, $cref; - - $self->{read_ahead} += $len; - $self->{content_length} += $len; - - # if too large, disconnect them... - if ($max_size && $self->{content_length} > $max_size) { - # TODO: delete file at this point? we're disconnecting them - # to prevent them from writing more, but do we care to keep - # what they already wrote? - $self->close; - return; - } - - $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary - }, - on_disconnect => sub { - warn "Disconnect during chunked PUT.\n"; - - # TODO: do we unlink the file here, since it wasn't a proper close - # ending in a zero-length chunk? perhaps a config option? for - # now we'll just leave it on disk with what we've got so far: - $self->close('remote_closure_during_chunked_put'); - }, - on_zero_chunk => sub { - $self->{chunked_upload_state} = undef; - $self->watch_read(0); - - # kick off any necessary aio writes: - $self->put_writeout; - # this will do nothing, if a put is already in progress: - $self->put_close; - }, - }}); - - $self->start_put_open($disk_path, $filename); - - return 1; -} - -# called when we're requested to do a delete -sub handle_delete { - my Perlbal::ClientHTTP $self = shift; - - return $self->send_response(403) unless $self->{service}->{enable_delete}; - - $self->watch_read(0); - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - if ($uri =~ m!^(?:/[\w\-\.]+)+$!) { - # now attempt the unlink - Perlbal::AIO::aio_unlink($self->{service}->{docroot} . '/' . $uri, sub { - my $err = shift; - if ($err == 0 && !$!) { - # delete was successful - return $self->send_response(204); - } elsif ($! == ENOENT) { - # no such file - return $self->send_response(404); - } else { - # failure... - return $self->send_response(400, "$!"); - } - }); - } else { - # bad URI, don't accept the delete - return $self->send_response(400, 'Invalid filename'); - } -} - -sub event_read_put { - my Perlbal::ClientHTTP $self = shift; - - if (my $cus = $self->{chunked_upload_state}) { - $cus->on_readable($self); - return; - } - - # read in data and shove it on the read buffer - my $dataref = $self->read($self->{content_length_remain}); - - # unless they disconnected prematurely - unless (defined $dataref) { - $self->close('remote_closure'); - return; - } - - # got some data - push @{$self->{read_buf}}, $dataref; - my $clen = length($$dataref); - $self->{read_size} += $clen; - $self->{read_ahead} += $clen; - $self->{content_length_remain} -= $clen; - - if ($self->{content_length_remain}) { - $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary - } else { - # now, if we've filled the content of this put, we're done - $self->watch_read(0); - $self->put_writeout; - } -} - -# verify that a minimum put directory exists. if/when it's verified, -# perhaps cached, the provided callback will be run. -sub validate_min_put_directory { - my Perlbal::ClientHTTP $self = shift; - my ($req_path, $extra_path, $filename, $callback) = @_; - - my $disk_dir = $self->{service}->{docroot} . '/' . $req_path; - return $callback->() if $VerifiedDirs{$disk_dir}; - - $self->{put_in_progress} = 1; - - Perlbal::AIO::aio_open($disk_dir, O_RDONLY, 0755, sub { - my $fh = shift; - $self->{put_in_progress} = 0; - - # if error return failure - return $self->send_response(404, "Base directory does not exist") unless $fh; - CORE::close($fh); - - # mindir existed, mark it as so and start the open for the rest of the path - $VerifiedDirs{$disk_dir} = 1; - $callback->(); - }); -} - -# attempt to open a file being PUT for writing to disk -sub start_put_open { - my Perlbal::ClientHTTP $self = shift; - my ($path, $file) = @_; - - $self->{put_in_progress} = 1; - - Perlbal::AIO::aio_open("$path/$file", O_CREAT | O_TRUNC | O_WRONLY, 0644, sub { - # get the fd - my $fh = shift; - - # verify file was opened - $self->{put_in_progress} = 0; - - if (! $fh) { - if ($! == ENOENT) { - # directory doesn't exist, so let's manually create it - eval { File::Path::mkpath($path, 0, 0755); }; - return $self->system_error("Unable to create directory", "path = $path, file = $file") if $@; - - # should be created, call self recursively to try - return $self->start_put_open($path, $file); - } else { - return $self->system_error("Internal error", "error = $!, path = $path, file = $file"); - } - } - - $self->{put_fh} = $fh; - $self->{put_pos} = 0; - $self->{put_fh_filename} = "$path/$file"; - - # We just opened the file, haven't read_ahead any bytes, are expecting 0 bytes for read and we're - # not in chunked mode, so close the file immediately, we're done. - unless ($self->{read_ahead} || $self->{content_length_remain} || $self->{chunked_upload_state}) { - # FIXME this should be done through AIO - $self->put_close; - return; - } - - $self->put_writeout; - }); -} - -# called when we've got some put data to write out -sub put_writeout { - my Perlbal::ClientHTTP $self = shift; - Carp::confess("wrong class for $self") unless ref $self eq "Perlbal::ClientHTTP"; - - return if $self->{service}->run_hook('put_writeout', $self); - return if $self->{put_in_progress}; - return unless $self->{put_fh}; - return unless $self->{read_ahead}; - - my $data = join("", map { $$_ } @{$self->{read_buf}}); - my $count = length $data; - - # reset our input buffer - $self->{read_buf} = []; - $self->{read_ahead} = 0; - - # okay, file is open, write some data - $self->{put_in_progress} = 1; - - Perlbal::AIO::set_file_for_channel($self->{put_fh_filename}); - Perlbal::AIO::aio_write($self->{put_fh}, $self->{put_pos}, $count, $data, sub { - return if $self->{closed}; - - # see how many bytes written - my $bytes = shift() + 0; - - $self->{put_pos} += $bytes; - $self->{put_in_progress} = 0; - - # now recursively call ourselves? - if ($self->{read_ahead}) { - $self->put_writeout; - return; - } - - return if $self->{content_length_remain} || $self->{chunked_upload_state}; - - # we're done putting this file, so close it. - # FIXME this should be done through AIO - $self->put_close; - }); -} - -sub put_close { - my Perlbal::ClientHTTP $self = shift; - return if $self->{put_in_progress}; - return unless $self->{put_fh}; - - if (CORE::close($self->{put_fh})) { - $self->{put_fh} = undef; - return $self->send_response(200); - } else { - return $self->system_error("Error saving file", "error in close: $!"); - } -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientHTTPBase.pm b/lib/mogdeps/Perlbal/ClientHTTPBase.pm deleted file mode 100644 index 80d64cfc..00000000 --- a/lib/mogdeps/Perlbal/ClientHTTPBase.pm +++ /dev/null @@ -1,905 +0,0 @@ -###################################################################### -# Common HTTP functionality for ClientProxy and ClientHTTP -# possible states: -# reading_headers (initial state, then follows one of two paths) -# wait_backend, backend_req_sent, wait_res, xfer_res, draining_res -# wait_stat, wait_open, xfer_disk -# both paths can then go into persist_wait, which means they're waiting -# for another request from the user -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::ClientHTTPBase; -use strict; -use warnings; -no warnings qw(deprecated); - -use Sys::Syscall; -use base "Perlbal::Socket"; -use HTTP::Date (); -use fields ('service', # Perlbal::Service object - 'replacement_uri', # URI to send instead of the one requested; this is used - # to instruct _serve_request to send an index file instead - # of trying to serve a directory and failing - 'scratch', # extra storage; plugins can use it if they want - - # reproxy support - 'reproxy_file', # filename the backend told us to start opening - 'reproxy_file_size', # size of file, once we stat() it - 'reproxy_fh', # if needed, IO::Handle of fd - 'reproxy_file_offset', # how much we've sent from the file. - - 'post_sendfile_cb', # subref to run after we're done sendfile'ing the current file - - 'requests', # number of requests this object has performed for the user - - # service selector parent - 'selector_svc', # the original service from which we came - ); - -use Fcntl ':mode'; -use Errno qw(EPIPE ECONNRESET); -use POSIX (); - -# hard-code defaults can be changed with MIME management command -our $MimeType = {qw( - css text/css - doc application/msword - gif image/gif - htm text/html - html text/html - jpg image/jpeg - js application/x-javascript - mp3 audio/mpeg - mpg video/mpeg - pdf application/pdf - png image/png - tif image/tiff - tiff image/tiff - torrent application/x-bittorrent - txt text/plain - zip application/zip -)}; - -# ClientHTTPBase -sub new { - - my Perlbal::ClientHTTPBase $self = shift; - my ($service, $sock, $selector_svc) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); # init base fields - - $self->{service} = $service; - $self->{replacement_uri} = undef; - $self->{headers_string} = ''; - $self->{requests} = 0; - $self->{scratch} = {}; - $self->{selector_svc} = $selector_svc; - - $self->state('reading_headers'); - - $self->watch_read(1); - return $self; -} - -sub close { - my Perlbal::ClientHTTPBase $self = shift; - - # don't close twice - return if $self->{closed}; - - # could contain a closure with circular reference - $self->{post_sendfile_cb} = undef; - - # close the file we were reproxying, if any - CORE::close($self->{reproxy_fh}) if $self->{reproxy_fh}; - - # now pass up the line - $self->SUPER::close(@_); -} - -# given the response headers we just got, and considering our request -# headers, determine if we should be sending keep-alive header -# information back to the client -sub setup_keepalive { - my Perlbal::ClientHTTPBase $self = $_[0]; - print "ClientHTTPBase::setup_keepalive($self)\n" if Perlbal::DEBUG >= 2; - - # now get the headers we're using - my Perlbal::HTTPHeaders $reshd = $_[1]; - my Perlbal::HTTPHeaders $rqhd = $self->{req_headers}; - - # for now, we enforce outgoing HTTP 1.0 - $reshd->set_version("1.0"); - - # if we came in via a selector service, that's whose settings - # we respect for persist_client - my $svc = $self->{selector_svc} || $self->{service}; - my $persist_client = $svc->{persist_client} || 0; - print " service's persist_client = $persist_client\n" if Perlbal::DEBUG >= 3; - - # do keep alive if they sent content-length or it's a head request - my $do_keepalive = $persist_client && $rqhd->req_keep_alive($reshd); - if ($do_keepalive) { - print " doing keep-alive to client\n" if Perlbal::DEBUG >= 3; - my $timeout = $self->{service}->{persist_client_timeout}; - $reshd->header('Connection', 'keep-alive'); - $reshd->header('Keep-Alive', $timeout ? "timeout=$timeout, max=100" : undef); - } else { - print " doing connection: close\n" if Perlbal::DEBUG >= 3; - # FIXME: we don't necessarily want to set connection to close, - # but really set a space-separated list of tokens which are - # specific to the connection. "close" and "keep-alive" are - # just special. - $reshd->header('Connection', 'close'); - $reshd->header('Keep-Alive', undef); - } -} - -# overridden here from Perlbal::Socket to use the service value -sub max_idle_time { - return $_[0]->{service}->{persist_client_timeout}; -} - -# Called when this client is entering a persist_wait state, but before we are returned to base. -sub persist_wait { - -} - -# called when we've finished writing everything to a client and we need -# to reset our state for another request. returns 1 to mean that we should -# support persistence, 0 means we're discarding this connection. -sub http_response_sent { - my Perlbal::ClientHTTPBase $self = $_[0]; - - # close if we're supposed to - if ( - ! defined $self->{res_headers} || - ! $self->{res_headers}->res_keep_alive($self->{req_headers}) || - $self->{do_die} - ) - { - # do a final read so we don't have unread_data_waiting and RST - # the connection. IE and others send an extra \r\n after POSTs - my $dummy = $self->read(5); - - # close if we have no response headers or they say to close - $self->close("no_keep_alive"); - return 0; - } - - # if they just did a POST, set the flag that says we might expect - # an unadvertised \r\n coming from some browsers. Old Netscape - # 4.x did this on all POSTs, and Firefox/Safari do it on - # XmlHttpRequest POSTs. - if ($self->{req_headers}->request_method eq "POST") { - $self->{ditch_leading_rn} = 1; - } - - # now since we're doing persistence, uncork so the last packet goes. - # we will recork when we're processing a new request. - $self->tcp_cork(0); - - # reset state - $self->{replacement_uri} = undef; - $self->{headers_string} = ''; - $self->{req_headers} = undef; - $self->{res_headers} = undef; - $self->{reproxy_fh} = undef; - $self->{reproxy_file} = undef; - $self->{reproxy_file_size} = 0; - $self->{reproxy_file_offset} = 0; - $self->{read_buf} = []; - $self->{read_ahead} = 0; - $self->{read_size} = 0; - $self->{scratch} = {}; - $self->{post_sendfile_cb} = undef; - $self->state('persist_wait'); - - $self->persist_wait; - - if (my $selector_svc = $self->{selector_svc}) { - if (! $selector_svc->run_hook('return_to_base', $self)){ - $selector_svc->return_to_base($self); - } - } - - # NOTE: because we only speak 1.0 to clients they can't have - # pipeline in a read that we haven't read yet. - $self->watch_read(1); - $self->watch_write(0); - return 1; -} - -sub reproxy_fh { - my Perlbal::ClientHTTPBase $self = shift; - - # setter - if (@_) { - my ($fh, $size) = @_; - $self->state('xfer_disk'); - $self->{reproxy_fh} = $fh; - $self->{reproxy_file_offset} = 0; - $self->{reproxy_file_size} = $size; - - my $is_ssl_webserver = ( $self->{service}->{listener}->{sslopts} && - ( $self->{service}->{role} eq 'web_server') ); - - unless ($is_ssl_webserver) { - # call hook that we're reproxying a file - return $fh if $self->{service}->run_hook("start_send_file", $self); - # turn on writes (the hook might not have wanted us to) - $self->watch_write(1); - return $fh; - } else { # use aio_read for ssl webserver instead of sendfile - - print "webserver in ssl mode, sendfile disabled!\n" - if $Perlbal::DEBUG >= 3; - - # turn off writes - $self->watch_write(0); - #create filehandle for reading - my $data = ''; - Perlbal::AIO::aio_read($self->reproxy_fh, 0, 2048, $data, sub { - # got data? undef is error - return $self->_simple_response(500) unless $_[0] > 0; - - # seek into the file now so sendfile starts further in - my $ld = length $data; - sysseek($self->{reproxy_fh}, $ld, &POSIX::SEEK_SET); - $self->{reproxy_file_offset} = $ld; - # reenable writes after we get data - $self->tcp_cork(1); # by setting reproxy_file_offset above, - # it won't cork, so we cork it - $self->write($data); - $self->watch_write(1); - }); - return 1; - } - } - - return $self->{reproxy_fh}; -} - -sub event_read { - my Perlbal::ClientHTTPBase $self = shift; - - $self->{alive_time} = $Perlbal::tick_time; - - # see if we have headers? - die "Shouldn't get here! This is an abstract base class, pretty much, except in the case of the 'selector' role." - if $self->{req_headers}; - - my $hd = $self->read_request_headers; - return unless $hd; - - return if $self->{service}->run_hook('start_http_request', $self); - - # we must stop watching for events now, otherwise if there's - # PUT/POST overflow, it'll be sent to ClientHTTPBase, which can't - # handle it yet. must wait for the selector (which has as much - # time as it wants) to route as to our subclass, which can then - # re-enable reads. - $self->watch_read(0); - - my $select = sub { - # now that we have headers, it's time to tell the selector - # plugin that it's time for it to select which real service to - # use - my $selector = $self->{'service'}->selector(); - return $self->_simple_response(500, "No service selector configured.") - unless ref $selector eq "CODE"; - $selector->($self); - }; - - my $svc = $self->{'service'}; - if ($svc->{latency}) { - Danga::Socket->AddTimer($svc->{latency} / 1000, $select); - } else { - $select->(); - } -} - -sub reproxy_file_done { - my Perlbal::ClientHTTPBase $self = shift; - return if $self->{service}->run_hook('reproxy_fh_finished', $self); - # close the sendfile fd - CORE::close($self->{reproxy_fh}); - $self->{reproxy_fh} = undef; - if (my $cb = $self->{post_sendfile_cb}) { - $cb->(); - } else { - $self->http_response_sent; - } -} - -# client is ready for more of its file. so sendfile some more to it. -# (called by event_write when we're actually in this mode) -sub event_write_reproxy_fh { - my Perlbal::ClientHTTPBase $self = shift; - - my $remain = $self->{reproxy_file_size} - $self->{reproxy_file_offset}; - $self->tcp_cork(1) if $self->{reproxy_file_offset} == 0; - $self->watch_write(0); - - if ($self->{service}->{listener}->{sslopts}) { # SSL (sendfile does not do SSL) - return if $self->{closed}; - if ($remain <= 0) { #done - print "REPROXY SSL done\n" if Perlbal::DEBUG >= 2; - $self->reproxy_file_done; - return; - } - # queue up next read - Perlbal::AIO::set_file_for_channel($self->{reproxy_file}); - my $len = $remain > 4096 ? 4096 : $remain; # buffer size - my $buffer = ''; - Perlbal::AIO::aio_read( - $self->{reproxy_fh}, - $self->{reproxy_file_offset}, - $len, - $buffer, - sub { - return if $self->{closed}; - # we have buffer to send - my $rv = $_[0]; # arg is result of sysread - if (!defined($rv) || $rv <= 0) { # read error - # sysseek is called after sysread so $! not valid - $self->close('sysread_error'); - print STDERR "Error w/ reproxy sysread\n"; - return; - } - $self->{reproxy_file_offset} += $rv; - $self->tcp_cork(1); # by setting reproxy_file_offset above, - # it won't cork, so we cork it - $self->write($buffer); # start socket send - $self->watch_write(1); - } - ); - return; - } - - # cap at 128k sendfiles - my $to_send = $remain > 128 * 1024 ? 128 * 1024 : $remain; - - my $postread = sub { - return if $self->{closed}; - - my $sent = Perlbal::Socket::sendfile($self->{fd}, - fileno($self->{reproxy_fh}), - $to_send); - #warn "to_send = $to_send, sent = $sent\n"; - print "REPROXY Sent: $sent\n" if Perlbal::DEBUG >= 2; - - if ($sent < 0) { - return $self->close("epipe") if $! == EPIPE; - return $self->close("connreset") if $! == ECONNRESET; - print STDERR "Error w/ sendfile: $!\n"; - $self->close('sendfile_error'); - return; - } - $self->{reproxy_file_offset} += $sent; - - if ($sent >= $remain) { - $self->reproxy_file_done; - } else { - $self->watch_write(1); - } - }; - - # TODO: way to bypass readahead and go straight to sendfile for common/hot/recent files. - # something like: - # if ($hot) { $postread->(); return ; } - - if ($to_send < 0) { - Perlbal::log('warning', "tried to readahead negative bytes. filesize=$self->{reproxy_file_size}, offset=$self->{reproxy_file_offset}"); - # this code, doing sendfile, will fail gracefully with return - # code, not 'die', and we'll close with sendfile_error: - $postread->(); - return; - } - - Perlbal::AIO::set_file_for_channel($self->{reproxy_file}); - Perlbal::AIO::aio_readahead($self->{reproxy_fh}, - $self->{reproxy_file_offset}, - $to_send, $postread); -} - -sub event_write { - my Perlbal::ClientHTTPBase $self = shift; - - # Any HTTP client is considered alive if it's writable. - # if it's not writable for 30 seconds, we kill it. - # subclasses can decide what's appropriate for timeout. - $self->{alive_time} = $Perlbal::tick_time; - - # if we're sending a filehandle, go do some more sendfile: - if ($self->{reproxy_fh}) { - $self->event_write_reproxy_fh; - return; - } - - # otherwise just kick-start our write buffer. - if ($self->write(undef)) { - # we've written all data in the queue, so stop waiting for - # write notifications: - print "All writing done to $self\n" if Perlbal::DEBUG >= 2; - $self->watch_write(0); - } -} - -# this gets called when a "web" service is serving a file locally. -sub _serve_request { - my Perlbal::ClientHTTPBase $self = shift; - my Perlbal::HTTPHeaders $hd = shift; - - my $rm = $hd->request_method; - unless ($rm eq "HEAD" || $rm eq "GET") { - return $self->_simple_response(403, "Unimplemented method"); - } - - my $uri = Perlbal::Util::durl($self->{replacement_uri} || $hd->request_uri); - my Perlbal::Service $svc = $self->{service}; - - # start_serve_request hook - return 1 if $self->{service}->run_hook('start_serve_request', $self, \$uri); - - # don't allow directory traversal - if ($uri =~ m!/\.\./! || $uri !~ m!^/!) { - return $self->_simple_response(403, "Bogus URL"); - } - - # double question mark means to serve multiple files, comma separated after the - # questions. the uri part before the question mark is the relative base directory - # TODO: only do this if $uri has ?? and the service also allows it. otherwise - # we don't want to mess with anybody's meaning of '??' on the backend service - return $self->_serve_request_multiple($hd, $uri) if $uri =~ /\?\?/; - - # chop off the query string - $uri =~ s/\?.*//; - - return $self->_simple_response(500, "Docroot unconfigured") - unless $svc->{docroot}; - - my $file = $svc->{docroot} . $uri; - - # update state, since we're now waiting on stat - $self->state('wait_stat'); - - Perlbal::AIO::aio_stat($file, sub { - # client's gone anyway - return if $self->{closed}; - unless (-e _) { - return if $self->{service}->run_hook('static_get_poststat_file_missing', $self); - return $self->_simple_response(404); - } - - my $mtime = (stat(_))[9]; - my $lastmod = HTTP::Date::time2str($mtime); - my $ims = $hd->header("If-Modified-Since") || ""; - - # IE sends a request header like "If-Modified-Since: ; length=" - # so we have to remove the length bit before comparing it with our date. - # then we save the length to compare later. - my $ims_len; - if ($ims && $ims =~ s/; length=(\d+)//) { - $ims_len = $1; - } - - my $not_mod = $ims eq $lastmod && -f _; - - my $res; - my $not_satisfiable = 0; - my $size = -s _ if -f _; - - # extra protection for IE, since it's offering the info anyway. (see above) - $not_mod = 0 if $ims_len && $ims_len != $size; - - my ($status, $range_start, $range_end) = $hd->range($size); - - if ($not_mod) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(304); - } elsif ($status == 416) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(416); - $res->header("Content-Range", $size ? "bytes */$size" : "*"); - $res->header("Content-Length", 0); - $not_satisfiable = 1; - } elsif ($status == 206) { - # partial content - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(206); - } else { - return if $self->{service}->run_hook('static_get_poststat_pre_send', $self, $mtime); - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - } - - # now set whether this is keep-alive or not - $res->header("Date", HTTP::Date::time2str()); - $res->header("Server", "Perlbal"); - $res->header("Last-Modified", $lastmod); - - if (-f _) { - # advertise that we support byte range requests - $res->header("Accept-Ranges", "bytes"); - - unless ($not_mod || $not_satisfiable) { - my ($ext) = ($file =~ /\.(\w+)$/); - $res->header("Content-Type", - (defined $ext && exists $MimeType->{$ext}) ? $MimeType->{$ext} : "text/plain"); - - unless ($status == 206) { - $res->header("Content-Length", $size); - } else { - $res->header("Content-Range", "bytes $range_start-$range_end/$size"); - $res->header("Content-Length", $range_end - $range_start + 1); - } - } - - # has to happen after content-length is set to work: - $self->setup_keepalive($res); - - return if $self->{service}->run_hook('modify_response_headers', $self); - - if ($rm eq "HEAD" || $not_mod || $not_satisfiable) { - # we can return already, since we know the size - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return; - } - - # state update - $self->state('wait_open'); - - Perlbal::AIO::aio_open($file, 0, 0, sub { - my $rp_fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($rp_fh) if $rp_fh; - return; - } - - # handle errors - if (! $rp_fh) { - # couldn't open the file we had already successfully stat'ed. - # FIXME: do 500 vs. 404 vs whatever based on $! - return $self->close('aio_open_failure'); - } - - $self->state('xfer_disk'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - - # seek if partial content - if ($status == 206) { - sysseek($rp_fh, $range_start, &POSIX::SEEK_SET); - $size = $range_end - $range_start + 1; - } - - $self->{reproxy_file} = $file; - $self->reproxy_fh($rp_fh, $size); - }); - - } elsif (-d _) { - $self->try_index_files($hd, $res, $uri); - } - }); -} - -sub _serve_request_multiple { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $uri) = @_; - - my @multiple_files; - my %statinfo; # file -> [ stat fields ] - - # double question mark means to serve multiple files, comma - # separated after the questions. the uri part before the question - # mark is the relative base directory - my ($base, $list) = ($uri =~ /(.+)\?\?(.+)/); - - unless ($base =~ m!/$!) { - return $self->_simple_response(500, "Base directory (before ??) must end in slash.") - } - - # and remove any trailing ?.+ on the list, so you can do things like cache busting - # with a ?v= at the end of a list of files. - $list =~ s/\?.+//; - - my Perlbal::Service $svc = $self->{service}; - return $self->_simple_response(500, "Docroot unconfigured") - unless $svc->{docroot}; - - @multiple_files = split(/,/, $list); - - return $self->_simple_response(403, "Multiple file serving isn't enabled") unless $svc->{enable_concatenate_get}; - return $self->_simple_response(403, "Too many files requested") if @multiple_files > 100; - return $self->_simple_response(403, "Bogus filenames") if grep { m!(?:\A|/)\.\./! } @multiple_files; - - my $remain = @multiple_files + 1; # 1 for the base directory - my $dirbase = $svc->{docroot} . $base; - foreach my $file ('', @multiple_files) { - Perlbal::AIO::aio_stat("$dirbase$file", sub { - $remain--; - $statinfo{$file} = $! ? [] : [ stat(_) ]; - return if $remain || $self->{closed}; - $self->_serve_request_multiple_poststat($hd, $dirbase, \@multiple_files, \%statinfo); - }); - } -} - -sub _serve_request_multiple_poststat { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $basedir, $filelist, $stats) = @_; - - # base directory must be a directory - unless (S_ISDIR($stats->{''}[2] || 0)) { - return $self->_simple_response(404, "Base directory not a directory"); - } - - # files must all exist - my $sum_length = 0; - my $most_recent_mod = 0; - my $mime; # undef until set, or defaults to text/plain later - foreach my $f (@$filelist) { - my $stat = $stats->{$f}; - unless (S_ISREG($stat->[2] || 0)) { - return if $self->{service}->run_hook('concat_get_poststat_file_missing', $self); - return $self->_simple_response(404, "One or more file does not exist"); - } - if (!$mime && $f =~ /\.(\w+)$/ && $MimeType->{$1}) { - $mime = $MimeType->{$1}; - } - $sum_length += $stat->[7]; - $most_recent_mod = $stat->[9] if - $stat->[9] >$most_recent_mod; - } - $mime ||= 'text/plain'; - - my $lastmod = HTTP::Date::time2str($most_recent_mod); - my $ims = $hd->header("If-Modified-Since") || ""; - - # IE sends a request header like "If-Modified-Since: ; length=" - # so we have to remove the length bit before comparing it with our date. - # then we save the length to compare later. - my $ims_len; - if ($ims && $ims =~ s/; length=(\d+)//) { - $ims_len = $1; - } - - # What is -f _ doing here? don't we detect the existence of all files above in the loop? - my $not_mod = $ims eq $lastmod && -f _; - - my $res; - if ($not_mod) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(304); - } else { - return if $self->{service}->run_hook('concat_get_poststat_pre_send', $self, $most_recent_mod); - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Length", $sum_length); - } - - $res->header("Date", HTTP::Date::time2str()); - $res->header("Server", "Perlbal"); - $res->header("Last-Modified", $lastmod); - $res->header("Content-Type", $mime); - # has to happen after content-length is set to work: - $self->setup_keepalive($res); - return if $self->{service}->run_hook('modify_response_headers', $self); - - if ($hd->request_method eq "HEAD" || $not_mod) { - # we can return already, since we know the size - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return; - } - - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->state('wait_open'); - - # gotta send all files, one by one... - my @remain = @$filelist; - $self->{post_sendfile_cb} = sub { - unless (@remain) { - $self->write(sub { $self->http_response_sent; }); - return; - } - - my $file = shift @remain; - my $fullfile = "$basedir$file"; - my $size = $stats->{$file}[7]; - - Perlbal::AIO::aio_open($fullfile, 0, 0, sub { - my $rp_fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($rp_fh) if $rp_fh; - return; - } - - # handle errors - if (! $rp_fh) { - # couldn't open the file we had already successfully stat'ed. - # FIXME: do 500 vs. 404 vs whatever based on $! - return $self->close('aio_open_failure'); - } - - $self->{reproxy_file} = $file; - $self->reproxy_fh($rp_fh, $size); - }); - }; - $self->{post_sendfile_cb}->(); -} - -sub check_req_headers { - my Perlbal::ClientHTTPBase $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - - if ($self->{service}->trusted_ip($self->peer_ip_string)) { - my @ips = split /,\s*/, ($hds->header("X-Forwarded-For") || ''); - - # This list may be empty, and that's OK, in that case we should unset the - # observed_ip_string, so no matter what we'll use the 0th element, whether - # it happens to be an ip string, or undef. - $self->observed_ip_string($ips[0]); - } - - return; -} - -sub try_index_files { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $res, $uri, $filepos) = @_; - - # make sure this starts at 0 initially, and fail if it's past the end - $filepos ||= 0; - if ($filepos >= scalar(@{$self->{service}->{index_files} || []})) { - unless ($self->{service}->{dirindexing}) { - # just inform them that listing is disabled - $self->_simple_response(200, "Directory listing disabled"); - return; - } - - # ensure uri has one and only one trailing slash for better URLs - $uri =~ s!/*$!/!; - - # open the directory and create an index - my $body = ""; - my $file = $self->{service}->{docroot} . $uri; - - $res->header("Content-Type", "text/html"); - opendir(D, $file); - foreach my $de (sort readdir(D)) { - if (-d "$file/$de") { - $body .= "$de
\n"; - } else { - $body .= "$de
\n"; - } - } - closedir(D); - - $body .= ""; - $res->header("Content-Length", length($body)); - $self->setup_keepalive($res); - - $self->state('xfer_resp'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->write(\$body); - $self->write(sub { $self->http_response_sent; }); - return; - } - - # construct the file path we need to check - my $file = $self->{service}->{index_files}->[$filepos]; - my $fullpath = $self->{service}->{docroot} . $uri . '/' . $file; - - # now see if it exists - Perlbal::AIO::aio_stat($fullpath, sub { - return if $self->{closed}; - return $self->try_index_files($hd, $res, $uri, $filepos + 1) unless -f _; - - # at this point the file exists, so we just want to serve it - $self->{replacement_uri} = $uri . '/' . $file; - return $self->_serve_request($hd); - }); -} - -sub _simple_response { - my Perlbal::ClientHTTPBase $self = shift; - my ($code, $msg) = @_; # or bodyref - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response($code); - - my $body; - if ($code != 204 && $code != 304) { - $res->header("Content-Type", "text/html"); - my $en = $res->http_code_english; - $body = "

$code" . ($en ? " - $en" : "") . "

\n"; - $body .= $msg if $msg; - $res->header('Content-Length', length($body)); - } - - $res->header('Server', 'Perlbal'); - - $self->setup_keepalive($res); - - $self->state('xfer_resp'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - if (defined $body) { - unless ($self->{req_headers} && $self->{req_headers}->request_method eq 'HEAD') { - # don't write body for head requests - $self->write(\$body); - } - } - $self->write(sub { $self->http_response_sent; }); - return 1; -} - - -sub send_response { - my Perlbal::ClientHTTPBase $self = shift; - - $self->watch_read(0); - $self->watch_write(1); - return $self->_simple_response(@_); -} - -# method that sends a 500 to the user but logs it and any extra information -# we have about the error in question -sub system_error { - my Perlbal::ClientHTTPBase $self = shift; - my ($msg, $info) = @_; - - # log to syslog - Perlbal::log('warning', "system error: $msg ($info)"); - - # and return a 500 - return $self->send_response(500, $msg); -} - -sub event_err { my $self = shift; $self->close('error'); } -sub event_hup { my $self = shift; $self->close('hup'); } - -sub _sock_port { - my $name = $_[0]; - my $port = eval { (Socket::sockaddr_in($name))[0] }; - return $port unless $@; - # fallback to IPv6: - return (Socket6::unpack_sockaddr_in($name))[0]; -} - -sub as_string { - my Perlbal::ClientHTTPBase $self = shift; - - my $ret = $self->SUPER::as_string; - my $name = $self->{sock} ? getsockname($self->{sock}) : undef; - my $lport = $name ? _sock_port($name) : undef; - my $observed = $self->observed_ip_string; - $ret .= ": localport=$lport" if $lport; - $ret .= "; observed_ip=$observed" if defined $observed; - $ret .= "; reqs=$self->{requests}"; - $ret .= "; $self->{state}"; - - my $hd = $self->{req_headers}; - if (defined $hd) { - my $host = $hd->header('Host') || 'unknown'; - $ret .= "; http://$host" . $hd->request_uri; - } - - return $ret; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientManage.pm b/lib/mogdeps/Perlbal/ClientManage.pm deleted file mode 100644 index 5eee3c72..00000000 --- a/lib/mogdeps/Perlbal/ClientManage.pm +++ /dev/null @@ -1,150 +0,0 @@ -###################################################################### -# Management connection from a client -###################################################################### - -package Perlbal::ClientManage; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('service', - 'buf', - 'is_http', # bool: is an HTTP request? - 'ctx', # command context - ); - -# ClientManage -sub new { - my Perlbal::ClientManage $self = shift; - my ($service, $sock) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{buf} = ""; # what we've read so far, not forming a complete line - - $self->{ctx} = Perlbal::CommandContext->new; - $self->{ctx}->verbose(1); - - $self->watch_read(1); - return $self; -} - -# ClientManage -sub event_read { - my Perlbal::ClientManage $self = shift; - - my $bref; - unless ($self->{is_http}) { - $bref = $self->read(1024); - return $self->close() unless defined $bref; - $self->{buf} .= $$bref; - - if ($self->{buf} =~ /^(?:HEAD|GET|POST) /) { - $self->{is_http} = 1; - $self->{headers_string} .= $$bref; - } - } - - if ($self->{is_http}) { - my $hd = $self->read_request_headers; - return unless $hd; - $self->handle_http(); - return; - } - - while ($self->{buf} =~ s/^(.+?)\r?\n//) { - my $line = $1; - - if ($line =~ /^quit|exit$/) { - $self->close('user_requested_quit'); - return; - } - - my $out = sub { - $self->write("$_[0]\r\n"); - }; - - Perlbal::run_manage_command($line, $out, $self->{ctx}); - } -} - -sub event_write { - my $self = shift; - $self->watch_write(0) if $self->write(undef); -} - -# ClientManage -sub event_err { my $self = shift; $self->close; } -sub event_hup { my $self = shift; $self->close; } - -# HTTP management support -sub handle_http { - my Perlbal::ClientManage $self = shift; - - my $uri = $self->{req_headers}->request_uri; - - my $body; - my $code = "200 OK"; - - my $prebox = sub { - my $cmd = shift; - my $alt = shift; - $body .= "
$cmd
"; - Perlbal::run_manage_command($cmd, sub { - my $line = $_[0] || ""; - $alt->(\$line) if $alt; - $body .= "$line\n"; - }); - $body .= "
\n"; - - }; - - $body .= "\n"; - $body .= "\n"; - $body .= "perlbal management interface"; - - if ($uri eq "/") { - $body .= "

perlbal management interface

    "; - $body .= "
  • Sockets
  • "; - $body .= "
  • Perl Objects in use
  • "; - $body .= "
  • Service Details
      "; - foreach my $sname (Perlbal->service_names) { - my Perlbal::Service $svc = Perlbal->service($sname); - next unless $svc; - my $listen = $svc->{listen} ? " ($svc->{listen})" : ""; - $body .= "
    • $sname - $svc->{role}$listen
    • \n"; - } - $body .= "
  • "; - $body .= "
"; - } elsif ($uri eq "/socks") { - $prebox->('socks summary'); - - $prebox->('socks', sub { - ${$_[0]} =~ s!service \'(\w+)\'!$1!; - }); - } elsif ($uri eq "/obj") { - $prebox->('obj'); - } elsif ($uri =~ m!^/service\?(\w+)$!) { - my $service = $1; - $prebox->("show service $service"); - } else { - $code = "404 Not found"; - $body .= "

$code

"; - } - - $body .= "

Perlbal management.

\n"; - $self->write("HTTP/1.0 $code\r\nContent-type: text/html\r\nContent-Length: " . length($body) . - "\r\n\r\n$body"); - $self->write(sub { $self->close; }); - return; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientProxy.pm b/lib/mogdeps/Perlbal/ClientProxy.pm deleted file mode 100644 index 37cec088..00000000 --- a/lib/mogdeps/Perlbal/ClientProxy.pm +++ /dev/null @@ -1,1280 +0,0 @@ -###################################################################### -# HTTP Connection from a reverse proxy client -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# -package Perlbal::ClientProxy; -use strict; -use warnings; -use base "Perlbal::ClientHTTPBase"; -no warnings qw(deprecated); - -use Perlbal::ChunkedUploadState; -use Perlbal::Util; - -use fields ( - 'backend', # Perlbal::BackendHTTP object (or undef if disconnected) - 'backend_requested', # true if we've requested a backend for this request - 'reconnect_count', # number of times we've tried to reconnect to backend - 'high_priority', # boolean; 1 if we are or were in the high priority queue - 'low_priority', # boolean; 1 if we are or were in the low priority queue - 'reproxy_uris', # arrayref; URIs to reproxy to, in order - 'reproxy_expected_size', # int: size of response we expect to get back for reproxy - 'currently_reproxying', # arrayref; the host info and URI we're reproxying right now - 'content_length_remain', # int: amount of data we're still waiting for - 'responded', # bool: whether we've already sent a response to the user or not - 'last_request_time', # int: time that we last received a request - 'primary_res_hdrs', # if defined, we are doing a transparent reproxy-URI - # and the headers we get back aren't necessarily - # the ones we want. instead, get most headers - # from the provided res headers object here. - 'is_buffering', # bool; if we're buffering some/all of a request to memory/disk - 'is_writing', # bool; if on, we currently have an aio_write out - 'start_time', # hi-res time when we started getting data to upload - 'bufh', # buffered upload filehandle object - 'bufilename', # string; buffered upload filename - 'bureason', # string; if defined, the reason we're buffering to disk - 'buoutpos', # int; buffered output position - 'backend_stalled', # boolean: if backend has shut off its reads because we're too slow. - 'unread_data_waiting', # boolean: if we shut off reads while we know data is yet to be read from client - 'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef - 'request_body_length', # integer: request's body length, either as-declared, - # or calculated after chunked upload is complete - - # for perlbal sending out UDP packets related to upload status (for xmlhttprequest upload bar) - 'last_upload_packet', # unixtime we last sent a UDP upload packet - 'upload_session', # client's self-generated upload session - - # error-retrying stuff - 'retry_count', # number of times we've retried this request so far after getting 500 errors - ); - -use constant READ_SIZE => 131072; # 128k, ~common TCP window size? -use constant READ_AHEAD_SIZE => 32768; # kinda arbitrary. sum of these two is max stored per connection while waiting for backend. -use Errno qw( EPIPE ENOENT ECONNRESET EAGAIN ); -use POSIX qw( O_CREAT O_TRUNC O_RDWR O_RDONLY ); -use Time::HiRes qw( gettimeofday tv_interval ); - -my $udp_sock; - -# ClientProxy -sub new { - my Perlbal::ClientProxy $self = shift; - my ($service, $sock) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($service, $sock ); - - Perlbal::objctor($self); - - $self->init; - $self->watch_read(1); - return $self; -} - -sub new_from_base { - my $class = shift; - my Perlbal::ClientHTTPBase $cb = shift; - Perlbal::Util::rebless($cb, $class); - $cb->init; - $cb->watch_read(1); - $cb->handle_request; - return $cb; -} - -sub init { - my Perlbal::ClientProxy $self = $_[0]; - - $self->{last_request_time} = 0; - - $self->{backend} = undef; - $self->{high_priority} = 0; - $self->{low_priority} = 0; - - $self->{responded} = 0; - $self->{unread_data_waiting} = 0; - $self->{content_length_remain} = undef; - $self->{backend_requested} = 0; - - $self->{is_buffering} = 0; - $self->{is_writing} = 0; - $self->{start_time} = undef; - $self->{bufh} = undef; - $self->{bufilename} = undef; - $self->{buoutpos} = 0; - $self->{bureason} = undef; - $self->{chunked_upload_state} = undef; - $self->{request_body_length} = undef; - - $self->{reproxy_uris} = undef; - $self->{reproxy_expected_size} = undef; - $self->{currently_reproxying} = undef; - - $self->{retry_count} = 0; -} - -# given a service name, re-request (GET/HEAD only) to that service, even though -# you've already done a request to your original service -sub start_reproxy_service { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::HTTPHeaders $primary_res_hdrs = $_[1]; - my $svc_name = $_[2]; - - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $self->_simple_response(404, "Vhost twiddling not configured for requested pair."); - return 1; - } - - $self->{backend_requested} = 0; - $self->{backend} = undef; - $self->{res_headers} = $primary_res_hdrs; - - $svc->adopt_base_client($self); -} - -# call this with a string of space separated URIs to start a process -# that will fetch the item at the first and return it to the user, -# on failure it will try the second, then third, etc -sub start_reproxy_uri { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::HTTPHeaders $primary_res_hdrs = $_[1]; - my $urls = $_[2]; - - # at this point we need to disconnect from our backend - $self->{backend} = undef; - - # failure if we have no primary response headers - return unless $self->{primary_res_hdrs} ||= $primary_res_hdrs; - - # construct reproxy_uri list - if (defined $urls) { - my @uris = split /\s+/, $urls; - $self->{currently_reproxying} = undef; - $self->{reproxy_uris} = []; - foreach my $uri (@uris) { - next unless $uri =~ m!^http://(.+?)(?::(\d+))?(/.*)?$!; - push @{$self->{reproxy_uris}}, [ $1, $2 || 80, $3 || '/' ]; - } - } - - # if we get in here and we have currently_reproxying defined, then something - # happened and we want to retry that one - if ($self->{currently_reproxying}) { - unshift @{$self->{reproxy_uris}}, $self->{currently_reproxying}; - $self->{currently_reproxying} = undef; - } - - # if we have no uris in our list now, tell the user 404 - return $self->_simple_response(503) - unless @{$self->{reproxy_uris} || []}; - - # set the expected size if we got a content length in our headers - if ($primary_res_hdrs && (my $expected_size = $primary_res_hdrs->header('X-REPROXY-EXPECTED-SIZE'))) { - $self->{reproxy_expected_size} = $expected_size; - } - - # pass ourselves off to the reproxy manager - $self->state('wait_backend'); - Perlbal::ReproxyManager::do_reproxy($self); -} - -# called by the reproxy manager when we can't get to our requested backend -sub try_next_uri { - my Perlbal::ClientProxy $self = $_[0]; - - shift @{$self->{reproxy_uris}}; - $self->{currently_reproxying} = undef; - $self->start_reproxy_uri(); -} - -# returns true if this ClientProxy is too many bytes behind the backend -sub too_far_behind_backend { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $backend = $self->{backend} or return 0; - - # if a backend doesn't have a service, it's a - # ReproxyManager-created backend, and thus it should use the - # 'buffer_size_reproxy_url' parameter for acceptable buffer - # widths, and not the regular 'buffer_size'. this lets people - # tune buffers depending on the types of webservers. (assumption - # being that reproxied-to webservers are event-based and it's okay - # to tie the up longer in favor of using less buffer memory in - # perlbal) - my $max_buffer = defined $backend->{service} ? - $self->{service}->{buffer_size} : - $self->{service}->{buffer_size_reproxy_url}; - - return $self->{write_buf_size} > $max_buffer; -} - -# this is a callback for when a backend has been created and is -# ready for us to do something with it -sub use_reproxy_backend { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # get a URI - my $datref = $self->{currently_reproxying} = shift @{$self->{reproxy_uris}}; - unless (defined $datref) { - # return error and close the backend - $be->close('invalid_uris'); - return $self->_simple_response(503); - } - - # now send request - $self->{backend} = $be; - $be->{client} = $self; - - my $extra_hdr = ""; - if (my $range = $self->{req_headers}->header("Range")) { - $extra_hdr .= "Range: $range\r\n"; - } - if (my $host = $self->{req_headers}->header("Host")) { - $extra_hdr .= "Host: $host\r\n"; - } - - my $req_method = $self->{req_headers}->request_method eq 'HEAD' ? 'HEAD' : 'GET'; - my $headers = "$req_method $datref->[2] HTTP/1.0\r\nConnection: keep-alive\r\n${extra_hdr}\r\n"; - - $be->{req_headers} = Perlbal::HTTPHeaders->new(\$headers); - $be->state('sending_req'); - $self->state('backend_req_sent'); - $be->write($be->{req_headers}->to_string_ref); - $be->watch_read(1); - $be->watch_write(1); -} - -# this is called when a transient backend getting a reproxied URI has received -# a response from the server and is ready for us to deal with it -sub backend_response_received { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # a response means that we are no longer currently waiting on a reproxy, and - # don't want to retry this URI - $self->{currently_reproxying} = undef; - - # we fail if we got something that's NOT a 2xx code, OR, if we expected - # a certain size and got back something different - my $code = $be->{res_headers}->response_code + 0; - - my $bad_code = sub { - return 0 if $code >= 200 && $code <= 299; - return 0 if $code == 416; - return 1; - }; - - my $bad_size = sub { - return 0 unless defined $self->{reproxy_expected_size}; - return $self->{reproxy_expected_size} != $be->{res_headers}->header('Content-length'); - }; - - if ($bad_code->() || $bad_size->()) { - # fall back to an alternate URL - $be->{client} = undef; - $be->close('non_200_reproxy'); - $self->try_next_uri; - return 1; - } - return 0; -} - -sub start_reproxy_file { - my Perlbal::ClientProxy $self = shift; - my $file = shift; # filename to reproxy - my Perlbal::HTTPHeaders $hd = shift; # headers from backend, in need of cleanup - - # at this point we need to disconnect from our backend - $self->{backend} = undef; - - # call hook for pre-reproxy - return if $self->{service}->run_hook("start_file_reproxy", $self, \$file); - - # set our expected size - if (my $expected_size = $hd->header('X-REPROXY-EXPECTED-SIZE')) { - $self->{reproxy_expected_size} = $expected_size; - } - - # start an async stat on the file - $self->state('wait_stat'); - Perlbal::AIO::aio_stat($file, sub { - - # if the client's since disconnected by the time we get the stat, - # just bail. - return if $self->{closed}; - - my $size = -s _; - - unless ($size) { - # FIXME: POLICY: 404 or retry request to backend w/o reproxy-file capability? - return $self->_simple_response(404); - } - if (defined $self->{reproxy_expected_size} && $self->{reproxy_expected_size} != $size) { - # 404; the file size doesn't match what we expected - return $self->_simple_response(404); - } - - # if the thing we're reproxying is indeed a file, advertise that - # we support byte ranges on it - if (-f _) { - $hd->header("Accept-Ranges", "bytes"); - } - - my ($status, $range_start, $range_end) = $self->{req_headers}->range($size); - my $not_satisfiable = 0; - - if ($status == 416) { - $hd = Perlbal::HTTPHeaders->new_response(416); - $hd->header("Content-Range", $size ? "bytes */$size" : "*"); - $not_satisfiable = 1; - } - - # change the status code to 200 if the backend gave us 204 No Content - $hd->code(200) if $hd->response_code == 204; - - # fixup the Content-Length header with the correct size (application - # doesn't need to provide a correct value if it doesn't want to stat()) - if ($status == 200) { - $hd->header("Content-Length", $size); - } elsif ($status == 206) { - $hd->header("Content-Range", "bytes $range_start-$range_end/$size"); - $hd->header("Content-Length", $range_end - $range_start + 1); - $hd->code(206); - } - - # don't send this internal header to the client: - $hd->header('X-REPROXY-FILE', undef); - - # rewrite some other parts of the header - $self->setup_keepalive($hd); - - # just send the header, now that we cleaned it. - $self->{res_headers} = $hd; - $self->write($hd->to_string_ref); - - if ($self->{req_headers}->request_method eq 'HEAD' || $not_satisfiable) { - $self->write(sub { $self->http_response_sent; }); - return; - } - - $self->state('wait_open'); - Perlbal::AIO::aio_open($file, 0, 0 , sub { - my $fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($fh) if $fh; - return; - } - - # handle errors - if (! $fh) { - # FIXME: do 500 vs. 404 vs whatever based on $! ? - return $self->_simple_response(500); - } - - # seek if partial content - if ($status == 206) { - sysseek($fh, $range_start, &POSIX::SEEK_SET); - $size = $range_end - $range_start + 1; - } - - $self->reproxy_fh($fh, $size); - $self->watch_write(1); - }); - }); -} - -# Client -# get/set backend proxy connection -sub backend { - my Perlbal::ClientProxy $self = shift; - return $self->{backend} unless @_; - - my $backend = shift; - $self->state('draining_res') unless $backend; - return $self->{backend} = $backend; -} - -# invoked by backend when it wants us to start watching for reads again -# and feeding it data (if we have any) -sub backend_ready { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # if we'd turned ourselves off while we waited for a backend, turn - # ourselves back on, because the backend is ready for data now. - if ($self->{unread_data_waiting}) { - $self->watch_read(1); - } - - # normal, not-buffered-to-disk case: - return $self->drain_read_buf_to($be) unless $self->{bureason}; - - # buffered-to-disk case. - - # tell the backend it has to go into buffered_upload_mode, - # which makes it inform us of its writable availability - $be->invoke_buffered_upload_mode; -} - -# our backend enqueues a call to this method in our write buffer, so this is called -# right after we've finished sending all of the results to the user. at this point, -# if we were doing keep-alive, we don't close and setup for the next request. -sub backend_finished { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::backend_finished\n" if Perlbal::DEBUG >= 3; - - # mark ourselves as having responded (presumably if we're here, - # the backend has responded already) - $self->{responded} = 1; - - # our backend is done with us, so we disconnect ourselves from it - $self->{backend} = undef; - - # backend is done sending data to us, so we can recycle this clientproxy - # if we don't have any data yet to read - return $self->http_response_sent unless $self->{unread_data_waiting}; - - # if we get here (and we do, rarely, in practice) then that means - # the backend read was empty/disconnected (or otherwise messed up), - # and the only thing we can really do is close the client down. - $self->close("backend_finished_while_unread_data"); -} - -# Called when this client is entering a persist_wait state, but before we are returned to base. -sub persist_wait { - my Perlbal::ClientProxy $self = $_[0]; - # We're in keepalive, and just completed a proxy request - $self->{service}->run_hooks('end_proxy_request', $self); -} - -# called when we've sent a response to a user fully and we need to reset state -sub http_response_sent { - my Perlbal::ClientProxy $self = $_[0]; - - # persistence logic is in ClientHTTPBase - return 0 unless $self->SUPER::http_response_sent; - - print "ClientProxy::http_response_sent -- resetting state\n" if Perlbal::DEBUG >= 3; - - if (my $be = $self->{backend}) { - $self->{backend} = undef; - $be->forget_client; - } - - # if we get here we're being persistent, reset our state - $self->{backend_requested} = 0; - $self->{high_priority} = 0; - $self->{reproxy_uris} = undef; - $self->{reproxy_expected_size} = undef; - $self->{currently_reproxying} = undef; - $self->{content_length_remain} = undef; - $self->{primary_res_hdrs} = undef; - $self->{responded} = 0; - $self->{is_buffering} = 0; - $self->{is_writing} = 0; - $self->{start_time} = undef; - $self->{bufh} = undef; - $self->{bufilename} = undef; - $self->{buoutpos} = 0; - $self->{bureason} = undef; - $self->{upload_session} = undef; - $self->{chunked_upload_state} = undef; - $self->{request_body_length} = undef; - return 1; -} - -# to request a backend connection AFTER you've already done so, if you -# didn't like the results from the first one. (like after a 500 error) -sub rerequest_backend { - my Perlbal::ClientProxy $self = shift; - - $self->{backend_requested} = 0; - $self->{backend} = undef; - $self->request_backend; -} - -sub request_backend { - my Perlbal::ClientProxy $self = shift; - return if $self->{backend_requested}; - $self->{backend_requested} = 1; - - $self->state('wait_backend'); - $self->{service}->request_backend_connection($self); - $self->tcp_cork(1); # cork writes to self -} - -# Client (overrides and calls super) -sub close { - my Perlbal::ClientProxy $self = shift; - my $reason = shift; - - warn sprintf( - "Perlbal::ClientProxy closed %s%s.\n", - ( $self->{closed} ? "again " : "" ), - (defined $reason ? "saying '$reason'" : "for an unknown reason") - ) if Perlbal::DEBUG >= 2; - - # don't close twice - return if $self->{closed}; - - # signal that we're done - $self->{service}->run_hooks('end_proxy_request', $self); - - # kill our backend if we still have one - if (my $backend = $self->{backend}) { - print "Client ($self) closing backend ($backend)\n" if Perlbal::DEBUG >= 1; - $self->backend(undef); - $backend->close($reason ? "proxied_from_client_close:$reason" : "proxied_from_client_close"); - } else { - # if no backend, tell our service that we don't care for one anymore - $self->{service}->note_client_close($self); - } - - # call ClientHTTPBase's close - $self->SUPER::close($reason); -} - -sub client_disconnected { # : void - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::client_disconnected\n" if Perlbal::DEBUG >= 2; - - # if client disconnected, then we need to turn off watching for - # further reads and purge the existing upload if any. also, we - # should just return and do nothing else. - - $self->watch_read(0); - $self->purge_buffered_upload if $self->{bureason}; - return $self->close('user_disconnected'); -} - -# Client -sub event_write { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::event_write\n" if Perlbal::DEBUG >= 3; - - # obviously if we're writing the backend has processed our request - # and we are responding/have responded to the user, so mark it so - $self->{responded} = 1; - - # will eventually, finally reset the whole object on completion - $self->SUPER::event_write; - - # trigger our backend to keep reading, if it's still connected - if ($self->{backend_stalled} && (my $backend = $self->{backend})) { - print " unstalling backend\n" if Perlbal::DEBUG >= 3; - - $self->{backend_stalled} = 0; - $backend->watch_read(1); - } -} - -# ClientProxy -sub event_read { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::event_read\n" if Perlbal::DEBUG >= 3; - - # mark alive so we don't get killed for being idle - $self->{alive_time} = time; - - # if we have no headers, the only thing we can do is try to get some - if (! $self->{req_headers}) { - print " no headers. reading.\n" if Perlbal::DEBUG >= 3; - $self->handle_request if $self->read_request_headers; - return; - } - - # if we're buffering to disk or haven't read too much from this client, keep reading, - # otherwise shut off read notifications - unless ($self->{is_buffering} || $self->{read_ahead} < READ_AHEAD_SIZE) { - # our buffer is full, so turn off reads for now - print " disabling reads.\n" if Perlbal::DEBUG >= 3; - $self->watch_read(0); - return; - } - - # deal with chunked uploads - if (my $cus = $self->{chunked_upload_state}) { - $cus->on_readable($self); - - # if we got more than 1MB not flushed to disk, - # stop reading for a bit until disk catches up - if ($self->{read_ahead} > 1024*1024) { - $self->watch_read(0); - } - return; - } - - # read more data if we're still buffering or if our current read buffer - # is not full to the max READ_AHEAD_SIZE which is how much data we will - # buffer in from the user before passing on to the backend - - # read the MIN(READ_SIZE, content_length_remain) - my $read_size = READ_SIZE; - my $remain = $self->{content_length_remain}; - - $read_size = $remain if $remain && $remain < $read_size; - print " reading $read_size bytes (", (defined $remain ? $remain : "(undef)"), " bytes remain)\n" if Perlbal::DEBUG >= 3; - - my $bref = $self->read($read_size); - - # if the read returned undef, that means the connection was closed - # (see: Danga::Socket::read) - return $self->client_disconnected unless defined $bref; - - # if they didn't declare a content body length and we just got a - # readable event that's not a disconnect, something's messed up. - # they're overflowing us. disconnect! - if (! $remain) { - $self->_simple_response(400, "Can't pipeline to HTTP/1.0"); - $self->close("pipelining_to_http10"); - return; - } - - # now that we know we have a defined value, determine how long it is, and do - # housekeeping to keep our tracking numbers up to date. - my $len = length($$bref); - print " read $len bytes\n" if Perlbal::DEBUG >= 3; - - # when run under the program "trickle", epoll speaks the truth to - # us, but then trickle interferes and steals our reads/writes, so - # this fails. normally this check isn't needed. - return unless $len; - - $self->{read_size} += $len; - $self->{content_length_remain} -= $len if $remain; - - my $done_reading = defined $self->{content_length_remain} && $self->{content_length_remain} <= 0; - my $backend = $self->backend; - print(" done_reading = $done_reading, backend = ", ($backend || ""), "\n") if Perlbal::DEBUG >= 3; - - # upload tracking - if (my $session = $self->{upload_session}) { - my $cl = $self->{req_headers}->content_length; - my $remain = $self->{content_length_remain}; - my $now = time(); # FIXME: more efficient? - if ($cl && $remain && ($self->{last_upload_packet} || 0) != $now) { - my $done = $cl - $remain; - $self->{last_upload_packet} = $now; - $udp_sock ||= IO::Socket::INET->new(Proto => 'udp'); - my $since = $self->{last_request_time}; - my $send = "UPLOAD:$session:$done:$cl:$since:$now"; - if ($udp_sock) { - foreach my $ep (@{ $self->{service}{upload_status_listeners_sockaddr} }) { - my $rv = $udp_sock->send($send, 0, $ep); - } - } - } - } - - # just dump the read into the nether if we're dangling. that is - # the case when we send the headers to the backend and it responds - # before we're done reading from the client; therefore further - # reads from the client just need to be sent nowhere, because the - # RFC2616 section 8.2.3 says: "the server SHOULD NOT close the - # transport connection until it has read the entire request" - if ($self->{responded}) { - print " already responded.\n" if Perlbal::DEBUG >= 3; - # in addition, if we're now out of data (clr == 0), then we should - # either close ourselves or get ready for another request - return $self->http_response_sent if $done_reading; - - print " already responded [2].\n" if Perlbal::DEBUG >= 3; - # at this point, if the backend has responded then we just return - # as we don't want to send it on to them or buffer it up, which is - # what the code below does - return; - } - - # if we have no data left to read, stop reading. all that can - # come later is an extra \r\n which we handle later when parsing - # new request headers. and if it's something else, we'll bail on - # the next request, not this one. - if ($done_reading) { - Carp::confess("content_length_remain less than zero: self->{content_length_remain}") - if $self->{content_length_remain} < 0; - $self->{unread_data_waiting} = 0; - $self->watch_read(0); - } - - # now, if we have a backend, then we should be writing it to the backend - # and not doing anything else - if ($backend) { - print " got a backend. sending write to it.\n" if Perlbal::DEBUG >= 3; - $backend->write($bref); - # TODO: monitor the backend's write buffer depth? - return; - } - - # now, we know we don't have a backend, so we have to push this data onto our - # read buffer... it's not going anywhere yet - push @{$self->{read_buf}}, $bref; - $self->{read_ahead} += $len; - print " no backend. read_ahead = $self->{read_ahead}.\n" if Perlbal::DEBUG >= 3; - - # if we know we've already started spooling a file to disk, then continue - # to do that. - print " bureason = $self->{bureason}\n" if Perlbal::DEBUG >= 3 && $self->{bureason}; - return $self->buffered_upload_update if $self->{bureason}; - - # if we are under our buffer-to-memory size, just continue buffering here and - # don't fall through to the backend request call below - return if - ! $done_reading && - $self->{read_ahead} < $self->{service}->{buffer_backend_connect}; - - # over the buffer-to-memory size, see if we should start spooling to disk. - return if $self->{service}->{buffer_uploads} && $self->decide_to_buffer_to_disk; - - # give plugins a chance to act on the request before we request a backend - # (added by Chris Hondl , March 2006) - my $svc = $self->{service}; - return if $svc->run_hook('proxy_read_request', $self); - - # if we fall through to here, we need to ensure that a backend is on the - # way, because no specialized handling took over above - print " finally requesting a backend\n" if Perlbal::DEBUG >= 3; - return $self->request_backend; -} - -sub handle_request { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - - unless ($req_hd) { - $self->close("handle_request without headers"); - return; - } - - $self->check_req_headers; - - my $svc = $self->{service}; - # give plugins a chance to force us to bail - return if $svc->run_hook('start_proxy_request', $self); - return if $svc->run_hook('start_http_request', $self); - - if ($self->handle_chunked_upload) { - # handled in method. - } else { - # if defined we're waiting on some amount of data. also, we have to - # subtract out read_size, which is the amount of data that was - # extra in the packet with the header that's part of the body. - my $length = $self->{request_body_length} = - $self->{content_length_remain} = - $req_hd->content_length; - - if (defined $length && $length < 0) { - $self->_simple_response(400, "Invalid request: Content-Length < 0"); - $self->close("negative_content_length"); - return; - } - - $self->{unread_data_waiting} = 1 if $self->{content_length_remain}; - } - - # upload-tracking stuff. both starting a new upload track session, - # and checking on status of ongoing one - return if $svc->{upload_status_listeners} && $self->handle_upload_tracking; - - # note that we've gotten a request - $self->{requests}++; - $self->{last_request_time} = $self->{alive_time}; - - # either start buffering some of the request to memory, or - # immediately request a backend connection. - if ($self->{chunked_upload_state}) { - $self->{request_body_length} = 0; - $self->{is_buffering} = 1; - $self->{bureason} = 'chunked'; - $self->buffered_upload_update; - } elsif ($self->{content_length_remain} && $self->{service}->{buffer_backend_connect}) { - # the deeper path - $self->start_buffering_request; - } else { - # get the backend request process moving, since we aren't buffering - $self->{is_buffering} = 0; - - # if reproxy-caching is enabled, we can often bypass needing to allocate a BackendHTTP connection: - return if $svc->{reproxy_cache} && $self->satisfy_request_from_cache; - - $self->request_backend; - } -} - -sub handle_chunked_upload { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - my $te = $req_hd->header("Transfer-Encoding"); - return unless $te && $te eq "chunked"; - return unless $self->{service}->{buffer_uploads}; - - $req_hd->header("Transfer-Encoding", undef); # remove it (won't go to backend) - - my $eh = $req_hd->header("Expect"); - if ($eh && $eh =~ /\b100-continue\b/) { - $self->write(\ "HTTP/1.1 100 Continue\r\n\r\n"); - $req_hd->header("Expect", undef); # remove it (won't go to backend) - } - - my $max_size = $self->{service}{max_chunked_request_size}; - - my $args = { - on_new_chunk => sub { - my $cref = shift; - my $len = length($$cref); - push @{$self->{read_buf}}, $cref; - $self->{read_ahead} += $len; - $self->{request_body_length} += $len; - - # if too large, disconnect them... - if ($max_size && $self->{request_body_length} > $max_size) { - $self->purge_buffered_upload; - $self->close; - return; - } - $self->buffered_upload_update; - }, - on_disconnect => sub { - $self->client_disconnected; - }, - on_zero_chunk => sub { - $self->send_buffered_upload; - }, - }; - - $self->{chunked_upload_state} = Perlbal::ChunkedUploadState->new(%$args); - return 1; -} - -sub satisfy_request_from_cache { - my Perlbal::ClientProxy $self = shift; - - my $req_hd = $self->{req_headers}; - my $svc = $self->{service}; - my $cache = $svc->{reproxy_cache}; - $svc->{_stat_requests}++; - - my $requri = $req_hd->request_uri || ''; - my $hostname = $req_hd->header("Host") || ''; - - my $key = "$hostname|$requri"; - - my $reproxy = $cache->get($key) or - return 0; - - my ($timeout, $headers, $urls) = @$reproxy; - return 0 if time() > $timeout; - - $svc->{_stat_cache_hits}++; - my %headers = map { ref $_ eq 'SCALAR' ? $$_ : $_ } @{$headers || []}; - - if (my $ims = $req_hd->header("If-Modified-Since")) { - my ($lm_key) = grep { uc($_) eq "LAST-MODIFIED" } keys %headers; - my $lm = $headers{$lm_key} || ""; - - # remove the IE length suffix - $ims =~ s/; length=(\d+)//; - - # If 'Last-Modified' is same as 'If-Modified-Since', send a 304 - if ($ims eq $lm) { - my $res_hd = Perlbal::HTTPHeaders->new_response(304); - $res_hd->header("Content-Length", "0"); - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res_hd->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return 1; - } - } - - my $res_hd = Perlbal::HTTPHeaders->new_response(200); - $res_hd->header("Date", HTTP::Date::time2str(time())); - while (my ($key, $value) = each %headers) { - $res_hd->header($key, $value); - } - - $self->start_reproxy_uri($res_hd, $urls); - return 1; -} - -# return 1 to steal this connection (when they're asking status of an -# upload session), return 0 to return it to handle_request's control. -sub handle_upload_tracking { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - - return 0 unless - $req_hd->request_uri =~ /[\?&]client_up_sess=(\w{5,50})\b/; - - my $sess = $1; - - # getting status? - if ($req_hd->request_uri =~ m!^/__upload_status\?!) { - my $status = Perlbal::UploadListener::get_status($sess); - my $now = time(); - my $body = $status ? - "{done:$status->{done},total:$status->{total},starttime:$status->{starttime},nowtime:$now}" : - "{}"; - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Type", "text/plain"); - $res->header('Content-Length', length $body); - $self->setup_keepalive($res); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->write(\ $body); - $self->write(sub { $self->http_response_sent; }); - return 1; - } - - # otherwise just tagging this upload as a new upload session - $self->{upload_session} = $sess; - return 0; -} - -# continuation of handle_request, in the case where we need to start buffering -# a bit of the request body to memory, either hoping that's all of it, or to -# make a determination of whether or not we should save it all to disk first -sub start_buffering_request { - my Perlbal::ClientProxy $self = shift; - - # buffering case: - $self->{is_buffering} = 1; - - # shortcut: if we know that we're buffering by size, and the size - # of this upload is bigger than that value, we can just turn on spool - # to disk right now... - if ($self->{service}->{buffer_uploads} && $self->{service}->{buffer_upload_threshold_size}) { - my $req_hd = $self->{req_headers}; - if ($req_hd->content_length >= $self->{service}->{buffer_upload_threshold_size}) { - $self->{bureason} = 'size'; - if ($ENV{PERLBAL_DEBUG_BUFFERED_UPLOADS}) { - $self->{req_headers}->header('X-PERLBAL-BUFFERED-UPLOAD-REASON', 'size'); - } - $self->state('buffering_upload'); - $self->buffered_upload_update; - return; - } - } - - # well, we're buffering, but we're not going to disk just yet (but still might) - $self->state('buffering_request'); - - # only need time if we are using the buffer to disk functionality - $self->{start_time} = [ gettimeofday() ] - if $self->{service}->{buffer_uploads}; -} - -# looks at our states and decides if we should start writing to disk -# or should just go ahead and blast this to the backend. returns 1 -# if the decision was made to buffer to disk -sub decide_to_buffer_to_disk { - my Perlbal::ClientProxy $self = shift; - return unless $self->{is_buffering}; - return $self->{bureason} if defined $self->{bureason}; - - # this is called when we have enough data to determine whether or not to - # start buffering to disk - my $dur = tv_interval($self->{start_time}) || 1; - my $rate = $self->{read_ahead} / $dur; - my $etime = $self->{content_length_remain} / $rate; - - # see if we have enough data to make the determination - my $reason = undef; - - # see if we blow the rate away - if ($self->{service}->{buffer_upload_threshold_rate} > 0 && - $rate < $self->{service}->{buffer_upload_threshold_rate}) { - # they are slower than the minimum rate - $reason = 'rate'; - } - - # and finally check estimated time exceeding - if ($self->{service}->{buffer_upload_threshold_time} > 0 && - $etime > $self->{service}->{buffer_upload_threshold_time}) { - # exceeds - $reason = 'time'; - } - - unless ($reason) { - $self->{is_buffering} = 0; - return 0; - } - - # start saving it to disk - $self->state('buffering_upload'); - $self->buffered_upload_update; - $self->{bureason} = $reason; - - if ($ENV{PERLBAL_DEBUG_BUFFERED_UPLOADS}) { - $self->{req_headers}->header('X-PERLBAL-BUFFERED-UPLOAD-REASON', $reason); - } - - return 1; -} - -# take ourselves and send along our buffered data to the backend -sub send_buffered_upload { - my Perlbal::ClientProxy $self = shift; - - # make sure our buoutpos is the same as the content length... - return if $self->{is_writing}; - - # set the content-length that goes to the backend... - if ($self->{chunked_upload_state}) { - $self->{req_headers}->header("Content-Length", $self->{request_body_length}); - } - - my $clen = $self->{req_headers}->content_length; - if ($clen != $self->{buoutpos}) { - Perlbal::log('crit', "Content length of $clen declared but $self->{buoutpos} bytes written to disk"); - return $self->_simple_response(500); - } - - # reset our position so we start reading from the right spot - $self->{buoutpos} = 0; - sysseek($self->{bufh}, 0, 0) if ($self->{bufh}); # But only if it exists at all - - # notify that we want the backend so we get the ball rolling - $self->request_backend; -} - -sub continue_buffered_upload { - my Perlbal::ClientProxy $self = shift; - my Perlbal::BackendHTTP $be = shift; - return unless $self && $be; - - # now send the data - my $clen = $self->{request_body_length}; - - if ($self->{buoutpos} < $clen) { - my $sent = Perlbal::Socket::sendfile($be->{fd}, fileno($self->{bufh}), $clen - $self->{buoutpos}); - if ($sent < 0) { - return $self->close("epipe") if $! == EPIPE; - return $self->close("connreset") if $! == ECONNRESET; - print STDERR "Error w/ sendfile: $!\n"; - return $self->close('sendfile_error'); - } - $self->{buoutpos} += $sent; - } - - # if we're done, purge the file and move on - if ($self->{buoutpos} >= $clen) { - $be->{buffered_upload_mode} = 0; - $self->purge_buffered_upload; - return; - } - - # we will be called again by the backend since buffered_upload_mode is on -} - -# write data to disk -sub buffered_upload_update { - my Perlbal::ClientProxy $self = shift; - return if $self->{is_writing}; - return unless $self->{is_buffering} && $self->{read_ahead}; - - # so we're not writing now and we have data to write... - unless ($self->{bufilename}) { - # create a filename and see if it exists or not - $self->{is_writing} = 1; - my $fn = join('-', $self->{service}->name, $self->{service}->listenaddr, "client", $self->{fd}, int(rand(0xffffffff))); - $fn = $self->{service}->{buffer_uploads_path} . '/' . $fn; - - # good, now we need to create the file - Perlbal::AIO::aio_open($fn, O_CREAT | O_TRUNC | O_RDWR, 0644, sub { - $self->{is_writing} = 0; - $self->{bufh} = shift; - - # throw errors back to the user - if (! $self->{bufh}) { - Perlbal::log('crit', "Failure to open $fn for buffered upload output"); - return $self->_simple_response(500); - } - - # save state and info and bounce it back to write data - $self->{bufilename} = $fn; - $self->buffered_upload_update; - }); - - return; - } - - # can't proceed if we have no disk file to async write to - # people reported seeing this crash rarely in production... - # must be a race between previously in-flight's write - # re-invoking a write immediately after something triggered - # a buffered upload purge. - unless ($self->{bufh}) { - $self->close; - return; - } - - # at this point, we want to do some writing - my $bref = shift(@{$self->{read_buf}}); - my $len = length $$bref; - $self->{read_ahead} -= $len; - - # so at this point we have a valid filename and file handle and should write out - # the buffer that we have - $self->{is_writing} = 1; - Perlbal::AIO::aio_write($self->{bufh}, $self->{buoutpos}, $len, $$bref, sub { - my $bytes = shift; - $self->{is_writing} = 0; - - # check for error - unless ($bytes > 0) { - Perlbal::log('crit', "Error writing buffered upload: $!. Tried to do $len bytes at $self->{buoutpos}."); - return $self->_simple_response(500); - } - - # update our count of data written - $self->{buoutpos} += $bytes; - - # now check if we wrote less than we had in this chunk of buffer. if that's - # the case then we need to re-enqueue the part of the chunk that wasn't - # written out and update as appropriate. - if ($bytes < $len) { - my $diff = $len - $bytes; - unshift @{$self->{read_buf}}, \ substr($$bref, $bytes, $diff); - $self->{read_ahead} += $diff; - } - - # if we're processing a chunked upload, ... - if ($self->{chunked_upload_state}) { - # turn reads back on, if we haven't hit the end yet. - if ($self->{unread_data_waiting} && $self->{read_ahead} < 1024*1024) { - $self->watch_read(1); - $self->{unread_data_waiting} = 0; - } - - if ($self->{read_ahead} == 0 && $self->{chunked_upload_state}->hit_zero_chunk) { - $self->watch_read(0); - $self->send_buffered_upload; - return; - } - } - - # if we're done (no clr and no read ahead!) then send it - elsif ($self->{read_ahead} <= 0 && $self->{content_length_remain} <= 0) { - $self->send_buffered_upload; - return; - } - - # spawn another writer! - $self->buffered_upload_update; - }); -} - -# destroy any files we've created -sub purge_buffered_upload { - my Perlbal::ClientProxy $self = shift; - - # Main reason for failure below is a 0-length chunked upload, where the file is never created. - return unless $self->{bufh}; - - # FIXME: it's reported that sometimes the two now-in-eval blocks - # fail, hence the eval blocks and warnings. the FIXME is to - # figure this out, why it happens sometimes. - - # first close our filehandle... not async - eval { - CORE::close($self->{bufh}); - }; - if ($@) { warn "Error closing file in ClientProxy::purge_buffered_upload: $@\n"; } - - $self->{bufh} = undef; - - eval { - # now asynchronously unlink the file - Perlbal::AIO::aio_unlink($self->{bufilename}, sub { - if ($!) { - # note an error, but whatever, we'll either overwrite the file later (O_TRUNC | O_CREAT) - # or a cleaner will come through and do it for us someday (if the user runs one) - Perlbal::log('warning', "Unable to link $self->{bufilename}: $!"); - } - }); - }; - if ($@) { warn "Error unlinking file in ClientProxy::purge_buffered_upload: $@\n"; } -} - -# returns bool; whether backend should hide the 500 error from the client -# and have us try a new backend. return true to retry, false to get a 500 error. -sub should_retry_after_500 { - my Perlbal::ClientProxy $self = shift; - my Perlbal::BackendHTTP $be = shift; - my $svc = $be->{service}; - return 0 unless $svc->{enable_error_retries}; - my @sched = split(/\s*,\s*/, $svc->{error_retry_schedule}); - return 0 if ++$self->{retry_count} > @sched; - return 1; -} - -# called by Backend to tell us it got a 500 error and we should retry another backend. -sub retry_after_500 { - my Perlbal::ClientProxy $self = shift; - my Perlbal::Service $svc = shift; - - my @sched = split(/\s*,\s*/, $svc->{error_retry_schedule}); - my $delay = $sched[$self->{retry_count} - 1]; - - if ($delay) { - Danga::Socket->AddTimer($delay, sub { - return if $self->{closed}; - $self->rerequest_backend; - }); - } else { - $self->rerequest_backend; - } - -} - -sub as_string { - my Perlbal::ClientProxy $self = shift; - - my $ret = $self->SUPER::as_string; - if ($self->{backend}) { - my $ipport = $self->{backend}->{ipport}; - $ret .= "; backend=$ipport"; - } else { - $ret .= "; write_buf_size=$self->{write_buf_size}" - if $self->{write_buf_size} > 0; - } - $ret .= "; highpri" if $self->{high_priority}; - $ret .= "; lowpri" if $self->{low_priority}; - $ret .= "; responded" if $self->{responded}; - $ret .= "; waiting_for=" . $self->{content_length_remain} - if defined $self->{content_length_remain}; - $ret .= "; reproxying" if $self->{currently_reproxying}; - - return $ret; -} - -sub set_queue_low { - my Perlbal::ClientProxy $self = shift; - $self->{low_priority} = 1; - return; -} - -sub set_queue_high { - my Perlbal::ClientProxy $self = shift; - $self->{high_priority} = 1; - return; -} - - -sub DESTROY { - Perlbal::objdtor($_[0]); - $_[0]->SUPER::DESTROY; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/CommandContext.pm b/lib/mogdeps/Perlbal/CommandContext.pm deleted file mode 100644 index 345d9617..00000000 --- a/lib/mogdeps/Perlbal/CommandContext.pm +++ /dev/null @@ -1,35 +0,0 @@ -# keep track of the surrounding context for a ManageCommand, so commands -# can be less verbose when in config files -# -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::CommandContext; -use strict; -use warnings; -no warnings qw(deprecated); - -use fields ( - 'last_created', # the name of the last pool or service created - 'verbose', # scalar bool: verbosity ("OK" on success) - ); - -sub new { - my $class = shift; - my $self = fields::new($class); - return $self; -} - -sub verbose { - my $self = shift; - $self->{verbose} = shift if @_; - $self->{verbose}; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/HTTPHeaders.pm b/lib/mogdeps/Perlbal/HTTPHeaders.pm deleted file mode 100644 index 8ceb3eae..00000000 --- a/lib/mogdeps/Perlbal/HTTPHeaders.pm +++ /dev/null @@ -1,483 +0,0 @@ -###################################################################### -# HTTP header class (both request and response) -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::HTTPHeaders; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal; - -use fields ( - 'headers', # href; lowercase header -> comma-sep list of values - 'origcase', # href; lowercase header -> provided case - 'hdorder', # aref; order headers were received (canonical order) - 'method', # scalar; request method (if GET request) - 'uri', # scalar; request URI (if GET request) - 'type', # 'res' or 'req' - 'code', # HTTP response status code - 'codetext', # status text that for response code - 'ver', # version (string) "1.1" - 'vernum', # version (number: major*1000+minor): "1.1" => 1001 - 'responseLine', # first line of HTTP response (if response) - 'requestLine', # first line of HTTP request (if request) - ); - -our $HTTPCode = { - 200 => 'OK', - 204 => 'No Content', - 206 => 'Partial Content', - 301 => 'Permanent Redirect', - 302 => 'Found', - 304 => 'Not Modified', - 400 => 'Bad request', - 403 => 'Forbidden', - 404 => 'Not Found', - 416 => 'Request range not satisfiable', - 500 => 'Internal Server Error', - 501 => 'Not Implemented', - 503 => 'Service Unavailable', -}; - -sub fail { - return undef unless Perlbal::DEBUG >= 1; - - my $reason = shift; - print "HTTP parse failure: $reason\n" if Perlbal::DEBUG >= 1; - return undef; -} - -sub http_code_english { - my Perlbal::HTTPHeaders $self = shift; - if (@_) { - return $HTTPCode->{shift()} || ""; - } else { - return "" unless $self->response_code; - return $HTTPCode->{$self->response_code} || ""; - } -} - -sub new_response { - my Perlbal::HTTPHeaders $self = shift; - $self = fields::new($self) unless ref $self; - - my $code = shift; - $self->{headers} = {}; - $self->{origcase} = {}; - $self->{hdorder} = []; - $self->{method} = undef; - $self->{uri} = undef; - - $self->{responseLine} = "HTTP/1.0 $code " . $self->http_code_english($code); - $self->{code} = $code; - $self->{type} = "httpres"; - - Perlbal::objctor($self, $self->{type}); - return $self; -} -*new_response_PERL = \&new_response; - -sub new { - my Perlbal::HTTPHeaders $self = shift; - $self = fields::new($self) unless ref $self; - - my ($hstr_ref, $is_response) = @_; - # hstr: headers as a string ref - # is_response: bool; is HTTP response (as opposed to request). defaults to request. - - my $absoluteURIHost = undef; - - my @lines = split(/\r?\n/, $$hstr_ref); - - $self->{headers} = {}; - $self->{origcase} = {}; - $self->{hdorder} = []; - $self->{method} = undef; - $self->{uri} = undef; - $self->{type} = ($is_response ? "res" : "req"); - Perlbal::objctor($self, $self->{type}); - - # check request line - if ($is_response) { - $self->{responseLine} = (shift @lines) || ""; - - # check for valid response line - return fail("Bogus response line") unless - $self->{responseLine} =~ m!^HTTP\/(\d+)\.(\d+)\s+(\d+)\s+(.*)$!; - - my ($ver_ma, $ver_mi, $code) = ($1, $2, $3); - $self->code($code, $4); - - # version work so we know what version the backend spoke - unless (defined $ver_ma) { - ($ver_ma, $ver_mi) = (0, 9); - } - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - } else { - $self->{requestLine} = (shift @lines) || ""; - - # check for valid request line - return fail("Bogus request line") unless - $self->{requestLine} =~ m!^(\w+) ((?:\*|(?:\S*?)))(?: HTTP/(\d+)\.(\d+))$!; - - $self->{method} = $1; - $self->{uri} = $2; - - my ($ver_ma, $ver_mi) = ($3, $4); - - # now check uri for not being a uri - if ($self->{uri} =~ m!^http://([^/:]+?)(?::\d+)?(/.*)?$!) { - $absoluteURIHost = lc($1); - $self->{uri} = $2 || "/"; # "http://www.foo.com" yields no path, so default to "/" - } - - # default to HTTP/0.9 - unless (defined $ver_ma) { - ($ver_ma, $ver_mi) = (0, 9); - } - - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - } - - my $last_header = undef; - foreach my $line (@lines) { - if ($line =~ /^\s/) { - next unless defined $last_header; - $self->{headers}{$last_header} .= $line; - } elsif ($line =~ /^([^\x00-\x20\x7f()<>@,;:\\\"\/\[\]?={}]+):\s*(.*)$/) { - # RFC 2616: - # sec 4.2: - # message-header = field-name ":" [ field-value ] - # field-name = token - # sec 2.2: - # token = 1* - - $last_header = lc($1); - if (defined $self->{headers}{$last_header}) { - if ($last_header eq "set-cookie") { - # cookie spec doesn't allow merged headers for set-cookie, - # so instead we do this hack so to_string below does the right - # thing without needing to be arrayref-aware or such. also - # this lets client code still modify/delete this data - # (but retrieving the value of "set-cookie" will be broken) - $self->{headers}{$last_header} .= "\r\nSet-Cookie: $2"; - } else { - # normal merged header case (according to spec) - $self->{headers}{$last_header} .= ", $2"; - } - } else { - $self->{headers}{$last_header} = $2; - $self->{origcase}{$last_header} = $1; - push @{$self->{hdorder}}, $last_header; - } - } else { - return fail("unknown header line"); - } - } - - # override the host header if an absolute URI was provided - $self->header('Host', $absoluteURIHost) - if defined $absoluteURIHost; - - # now error if no host - return fail("HTTP 1.1 requires host header") - if !$is_response && $self->{vernum} >= 1001 && !$self->header('Host'); - - return $self; -} -*new_PERL = \&new; - -sub _codetext { - my Perlbal::HTTPHeaders $self = shift; - return $self->{codetext} if $self->{codetext}; - return $self->http_code_english; -} - -sub code { - my Perlbal::HTTPHeaders $self = shift; - my ($code, $text) = @_; - $self->{codetext} = $text; - if (! defined $self->{code} || $code != $self->{code}) { - $self->{code} = $code+0; - if ($self->{responseLine}) { - $self->{responseLine} = "HTTP/1.0 $code " . $self->http_code_english; - } - } -} - -sub response_code { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->{code}; -} - -sub request_method { - my Perlbal::HTTPHeaders $self = shift; - return $self->{method}; -} - -sub request_uri { - my Perlbal::HTTPHeaders $self = shift; - return $self->{uri}; -} - -sub set_request_uri { - my Perlbal::HTTPHeaders $self = shift; - return unless $self->{requestLine}; - - my $uri = shift; - - return unless defined $uri and length $uri; - - my $ver = $self->{ver}; - - if ($ver == 0.9) { - $self->{requestLine} = sprintf("%s %s", $self->{method}, $uri); - } else { - $self->{requestLine} = sprintf("%s %s HTTP/%s", $self->{method}, $uri, $ver); - } - - return $self->{uri} = $uri; -} - -sub version_number { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->{vernum} unless $_[1]; - return $self->{vernum} = $_[1]; -} - -sub header { - my Perlbal::HTTPHeaders $self = shift; - my $key = shift; - return $self->{headers}{lc($key)} unless @_; - - # adding a new header - my $origcase = $key; - $key = lc($key); - unless (exists $self->{headers}{$key}) { - push @{$self->{hdorder}}, $key; - $self->{origcase}{$key} = $origcase; - } - - return $self->{headers}{$key} = shift; -} - -sub headers_list { - my Perlbal::HTTPHeaders $self = shift; - return [$self->{headers} ? keys %{ $self->{headers} } : ()]; -} - -sub to_string_ref { - my Perlbal::HTTPHeaders $self = shift; - my $st = join("\r\n", - $self->{requestLine} || $self->{responseLine}, - (map { "$self->{origcase}{$_}: $self->{headers}{$_}" } - grep { defined $self->{headers}{$_} } - @{$self->{hdorder}}), - '', ''); # final \r\n\r\n - return \$st; -} - -sub clone { - my Perlbal::HTTPHeaders $self = shift; - my $new = fields::new($self); - foreach (qw(method uri type code codetext ver vernum responseLine requestLine)) { - $new->{$_} = $self->{$_}; - } - - # mark this object as constructed - Perlbal::objctor($new, $new->{type}); - - $new->{headers} = { %{$self->{headers}} }; - $new->{origcase} = { %{$self->{origcase}} }; - $new->{hdorder} = [ @{$self->{hdorder}} ]; - return $new; -} - -sub set_version { - my Perlbal::HTTPHeaders $self = shift; - my $ver = shift; - - die "Bogus version" unless $ver =~ /^(\d+)\.(\d+)$/; - my ($ver_ma, $ver_mi) = ($1, $2); - - # check for req, as the other can be res or httpres - if ($self->{type} eq 'req') { - $self->{requestLine} = "$self->{method} $self->{uri} HTTP/$ver"; - } else { - $self->{responseLine} = "HTTP/$ver $self->{code} " . $self->_codetext; - } - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - return $self; -} - -# using all available information, attempt to determine the content length of -# the message body being sent to us. -sub content_length { - my Perlbal::HTTPHeaders $self = shift; - - # shortcuts depending on our method/code, depending on what we are - if ($self->{type} eq 'req') { - # no content length for head requests - return 0 if $self->{method} eq 'HEAD'; - } elsif ($self->{type} eq 'res' || $self->{type} eq 'httpres') { - # no content length in any of these - if ($self->{code} == 304 || $self->{code} == 204 || - ($self->{code} >= 100 && $self->{code} <= 199)) { - return 0; - } - } - - # the normal case for a GET/POST, etc. real data coming back - # also, an OPTIONS requests generally has a defined but 0 content-length - if (defined(my $clen = $self->header("Content-Length"))) { - return $clen; - } - - # if we get here, nothing matched, so we don't definitively know what the - # content length is. this is usually an error, but we try to work around it. - return undef; -} - -# answers the question: "should a response to this person specify keep-alive, -# given the request (self) and the backend response?" this is used in proxy -# mode to determine based on the client's request and the backend's response -# whether or not the response from the proxy (us) should do keep-alive. -# -# FIXME: this is called too often (especially with service selector), -# and should be redesigned to be simpler, and/or cached on the -# connection. there's too much duplication with res_keep_alive. -sub req_keep_alive { - my Perlbal::HTTPHeaders $self = $_[0]; - my Perlbal::HTTPHeaders $res = $_[1] or Carp::confess("ASSERT: No response headers given"); - - # get the connection header now (saves warnings later) - my $conn = lc ($self->header('Connection') || ''); - - # check the client - if ($self->version_number < 1001) { - # they must specify a keep-alive header - return 0 unless $conn =~ /\bkeep-alive\b/i; - } - - # so it must be 1.1 which means keep-alive is on, unless they say not to - return 0 if $conn =~ /\bclose\b/i; - - # if we get here, the user wants keep-alive and seems to support it, - # so we make sure that the response is in a form that we can understand - # well enough to do keep-alive. FIXME: support chunked encoding in the - # future, which means this check changes. - return 1 if defined $res->header('Content-length') || - $res->response_code == 304 || # not modified - $res->response_code == 204 || # no content - $self->request_method eq 'HEAD'; - - # fail-safe, no keep-alive - return 0; -} - -# if an options response from a backend looks like it can do keep-alive. -sub res_keep_alive_options { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->res_keep_alive(undef, 1); -} - -# answers the question: "is the backend expected to stay open?" this -# is a combination of the request we sent to it and the response they -# sent... - -# FIXME: this is called too often (especially with service selector), -# and should be redesigned to be simpler, and/or cached on the -# connection. there's too much duplication with req_keep_alive. -sub res_keep_alive { - my Perlbal::HTTPHeaders $self = $_[0]; - my Perlbal::HTTPHeaders $req = $_[1]; - my $is_options = $_[2]; - Carp::confess("ASSERT: No request headers given") unless $req || $is_options; - - # get the connection header now (saves warnings later) - my $conn = lc ($self->header('Connection') || ''); - - # if they said Connection: close, it's always not keep-alive - return 0 if $conn =~ /\bclose\b/i; - - # handle the http 1.0/0.9 case which requires keep-alive specified - if ($self->version_number < 1001) { - # must specify keep-alive, and must have a content length OR - # the request must be a head request - return 1 if - $conn =~ /\bkeep-alive\b/i && - ($is_options || - defined $self->header('Content-length') || - $req->request_method eq 'HEAD' || - $self->response_code == 304 || # not modified - $self->response_code == 204 - ); # no content - - return 0; - } - - # HTTP/1.1 case. defaults to keep-alive, per spec, unless - # asked for otherwise (checked above) - # FIXME: make sure we handle a HTTP/1.1 response from backend - # with connection: close, no content-length, going to a - # HTTP/1.1 persistent client. we'll have to add chunk markers. - # (not here, obviously) - return 1; -} - -# returns (status, range_start, range_end) when given a size -# status = 200 - invalid or non-existent range header. serve normally. -# status = 206 - parseable range is good. serve partial content. -# status = 416 - Range is unsatisfiable -sub range { - my Perlbal::HTTPHeaders $self = $_[0]; - my $size = $_[1]; - - my $not_satisfiable; - my $range = $self->header("Range"); - - return 200 unless - $range && - defined $size && - $range =~ /^bytes=(\d*)-(\d*)$/; - - my ($range_start, $range_end) = ($1, $2); - - undef $range_start if $range_start eq ''; - undef $range_end if $range_end eq ''; - return 200 unless defined($range_start) or defined($range_end); - - if (defined($range_start) and defined($range_end) and $range_start > $range_end) { - return 416; - } elsif (not defined($range_start) and defined($range_end) and $range_end == 0) { - return 416; - } elsif (defined($range_start) and $size <= $range_start) { - return 416; - } - - $range_start = 0 unless defined($range_start); - $range_end = $size - 1 unless defined($range_end) and $range_end < $size; - - return (206, $range_start, $range_end); -} - - -sub DESTROY { - my Perlbal::HTTPHeaders $self = shift; - Perlbal::objdtor($self, $self->{type}); -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ManageCommand.pm b/lib/mogdeps/Perlbal/ManageCommand.pm deleted file mode 100644 index 21531eb0..00000000 --- a/lib/mogdeps/Perlbal/ManageCommand.pm +++ /dev/null @@ -1,104 +0,0 @@ -# class representing a one-liner management command. all the responses -# to a command should be done through this instance (out, err, ok, etc) -# -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ManageCommand; -use strict; -use warnings; -no warnings qw(deprecated); - -use fields ( - 'base', # the base command name (like "proc") - 'cmd', - 'ok', - 'err', - 'out', - 'orig', - 'argn', - 'ctx', - ); - -sub new { - my ($class, $base, $cmd, $out, $ok, $err, $orig, $ctx) = @_; - my $self = fields::new($class); - - $self->{base} = $base; - $self->{cmd} = $cmd; - $self->{ok} = $ok; - $self->{err} = $err; - $self->{out} = $out; - $self->{orig} = $orig; - $self->{ctx} = $ctx; - $self->{argn} = []; - return $self; -} - -# returns an managecommand object for functions that need one, but -# this does nothing but explode if there any problems. -sub loud_crasher { - use Carp qw(confess); - __PACKAGE__->new(undef, undef, sub {}, sub {}, sub { confess "MC:err: @_" }, "", Perlbal::CommandContext->new); -} - -sub out { my $mc = shift; return @_ ? $mc->{out}->(@_) : $mc->{out}; } -sub ok { my $mc = shift; return $mc->{ok}->(@_); } - -sub err { - my ($mc, $err) = @_; - $err =~ s/\n$//; - $mc->{err}->($err); -} - -sub cmd { my $mc = shift; return $mc->{cmd}; } -sub orig { my $mc = shift; return $mc->{orig}; } -sub end { my $mc = shift; $mc->{out}->("."); 1; } - -sub parse { - my $mc = shift; - my $regexp = shift; - my $usage = shift; - - my @ret = ($mc->{cmd} =~ /$regexp/); - $mc->parse_error($usage) unless @ret; - - my $i = 0; - foreach (@ret) { - $mc->{argn}[$i++] = $_; - } - return $mc; -} - -sub arg { - my $mc = shift; - my $n = shift; # 1-based array, to correspond with $1, $2, $3 - return $mc->{argn}[$n - 1]; -} - -sub args { - my $mc = shift; - return @{$mc->{argn}}; -} - -sub parse_error { - my $mc = shift; - my $usage = shift; - $usage .= "\n" if $usage && $usage !~ /\n$/; - die $usage || "Invalid syntax to '$mc->{base}' command\n" -} - -sub no_opts { - my $mc = shift; - die "The '$mc->{base}' command takes no arguments\n" - unless $mc->{cmd} eq $mc->{base}; - return $mc; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Plugin/AccessControl.pm b/lib/mogdeps/Perlbal/Plugin/AccessControl.pm deleted file mode 100644 index a13ad64f..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AccessControl.pm +++ /dev/null @@ -1,201 +0,0 @@ -package Perlbal::Plugin::AccessControl; - -use Perlbal; -use strict; -use warnings; -no warnings qw(deprecated); - -# commands like: -# -# what to do if we fall off the rule chain: -# ACCESS POLICY {ALLOW,DENY} -# -# adding things to the rule chain. processing stops once any rule is matched. -# -# ACCESS {ALLOW,DENY} netmask 127.0.0.1/8 -# ACCESS {ALLOW,DENY} ip 127.0.0.1 -# also can make a match set the request to go into the low-priority perlbal queue: -# ACCESS QUEUE_LOW ip 127.0.0.1 - -# reset the rule chain and policy: (policy is allow by default) -# ACCESS RESET - -# Future: -# access {allow,deny} forwarded_ip 127.0.0.1 -# access {allow,deny} method [,]* -# access {allow,deny} forwarded_netmask 127.0.0.1/24 - -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.access', sub { - my $mc = shift->parse(qr/^access\s+ - (policy|allow|deny|reset|queue_low) # cmd - (?:\s+(\S+))? # arg1 - (?:\s+(\S+))? # optional arg2 - $/x, - "usage: ACCESS []"); - my ($cmd, $arg1, $arg2) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $ss = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $ss; - - my $cfg = $ss->{extra_config}->{_access} ||= {}; - - if ($cmd eq "reset") { - $ss->{extra_config}->{_access} = {}; - return $mc->ok; - } - - if ($cmd eq "policy") { - return $mc->err("policy must be 'allow' or 'deny'") unless - $arg1 =~ /^allow|deny$/; - $cfg->{deny_default} = $arg1 eq "deny"; - return $mc->ok; - } - - if ($cmd eq "allow" || $cmd eq "deny" || $cmd eq "queue_low") { - my ($what, $val) = ($arg1, $arg2); - return $mc->err("Unknown item to $cmd: '$what'") unless - $what && ($what eq "ip" || $what eq "netmask"); - - if ($what eq "netmask") { - return $mc->err("Net::Netmask not installed") - unless eval { require Net::Netmask; 1; }; - - $val = eval { Net::Netmask->new2($val) }; - return $mc->err("Error parsing netmask") unless $val; - } - - my $rules = $cfg->{rules} ||= []; - push @$rules, [ $cmd, $what, $val ]; - return $mc->ok; - } - - return $mc->err("can't get here"); - }); - - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - Perlbal::unregister_global_hook('manage_command.access'); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $use_observed_ip; - - $svc->register_hook('AccessControl', 'start_http_request', sub { - my Perlbal::ClientHTTPBase $client = shift; - my Perlbal::HTTPHeaders $hds = $client->{req_headers}; - return 0 unless $hds; - my $uri = $hds->request_uri; - - my $allow = sub { 0; }; - my $deny = sub { - $client->send_response(403, "Access denied."); - return 1; - }; - - my $queue_low = sub { - $client->set_queue_low; - return 0; - }; - - my $rule_action = sub { - my $rule = shift; - if ($rule->[0] eq "deny") { - return $deny->(); - } elsif ($rule->[0] eq "allow") { - return $allow->(); - } elsif ($rule->[0] eq "queue_low") { - return $queue_low->(); - } - }; - - my $match = sub { - my $rule = shift; - if ($rule->[1] eq "ip") { - my $peer_ip; - $peer_ip = $client->observed_ip_string if $use_observed_ip; - $peer_ip ||= $client->peer_ip_string; - - return $peer_ip eq $rule->[2]; - } - - if ($rule->[1] eq "netmask") { - my $peer_ip; - $peer_ip = $client->observed_ip_string if $use_observed_ip; - $peer_ip ||= $client->peer_ip_string; - - return eval { $rule->[2]->match($peer_ip); }; - } - }; - - my $cfg = $svc->{extra_config}->{_access} ||= {}; - my $rules = $cfg->{rules} || []; - foreach my $rule (@$rules) { - next unless $match->($rule); - return $rule_action->($rule) - } - - return $deny->() if $cfg->{deny_default}; - return $allow->(); - }); - - # Allow AccessControl users to specify that they would like to use the observed IP as - # opposed to the real IP for ACL checking. - $svc->register_setter('AccessControl', 'use_observed_ip', sub { - my ($out, $what, $val) = @_; - return 0 unless $what; - - $use_observed_ip = $val; - - $out->("OK") if $out; - - return 1; - }); - - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -sub dumpconfig { - my ($class, $svc) = @_; - - my @return; - - my $cfg = $svc->{extra_config}->{_access} ||= {}; - my $rules = $cfg->{rules} || []; - - foreach my $rule (@$rules) { - my $action = uc $rule->[0]; - my $type = uc $rule->[1]; - my $value = $rule->[2]; - push @return, "ACCESS $action $type $value"; - } - - my $default_action = $cfg->{deny_default} ? "DENY" : "ALLOW"; - push @return, "ACCESS POLICY $default_action"; - - return @return; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AtomInject.pm b/lib/mogdeps/Perlbal/Plugin/AtomInject.pm deleted file mode 100644 index c5bc1aa9..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AtomInject.pm +++ /dev/null @@ -1,65 +0,0 @@ -package Perlbal::Plugin::AtomInject; - -use Perlbal; -use strict; -use warnings; - -our @subs; # subscribers - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - $svc->{enable_put} = 1; - - $svc->register_hook('AtomInject', 'handle_put', sub { - my Perlbal::ClientHTTP $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - return 0 unless $hds; - - return $self->send_response(400, "Invalid method") - unless $hds->request_method eq "PUT"; - - my $uri = $hds->request_uri; - return $self->send_response(400, "Invalid uri") unless $uri =~ /^\//; - $self->{scratch}{path} = $uri; - - # now abort the normal handle_put processing... - return 1; - }); - - $svc->register_hook('AtomInject', 'put_writeout', sub { - my Perlbal::ClientHTTP $self = shift; - return 1 if $self->{content_length_remain}; - - my $data = join("", map { $$_ } @{$self->{read_buf}}); - - # reset our input buffer - $self->{read_buf} = []; - $self->{read_ahead} = 0; - - my $rv = eval { Perlbal::Plugin::AtomStream->InjectFeed(\$data, $self->{scratch}{path}); }; - return $self->send_response(200); - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -# called when we are loaded -sub load { - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AtomStream.pm b/lib/mogdeps/Perlbal/Plugin/AtomStream.pm deleted file mode 100644 index 5c45b9d7..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AtomStream.pm +++ /dev/null @@ -1,138 +0,0 @@ -package Perlbal::Plugin::AtomStream; - -use URI; - -use Perlbal; -use strict; -use warnings; - -our @subs; # subscribers -our @recent; # recent items in format [$epoch, $atom_ref, $path_segments_arrayref] - -our $last_timestamp = 0; - -use constant MAX_LAG => 262144; - -sub InjectFeed { - my $class = shift; - my ($atomref, $path) = @_; - - # maintain queue of last 60 seconds worth of posts - my $now = time(); - my @put_segments = URI->new($path)->path_segments; - push @recent, [ $now, $atomref, \@put_segments ]; - shift @recent while @recent && $recent[0][0] <= $now - 60; - - emit_timestamp($now) if $now > $last_timestamp; - - my $need_clean = 0; - foreach my $s (@subs) { - if ($s->{closed}) { - $need_clean = 1; - next; - } - - next unless filter(\@put_segments, $s->{scratch}{get_segments}); - - my $lag = $s->{write_buf_size}; - - if ($lag > MAX_LAG) { - $s->{scratch}{skipped_atom}++; - } else { - if (my $skip_count = $s->{scratch}{skipped_atom}) { - $s->{scratch}{skipped_atom} = 0; - $s->write(\ "\n"); - } - $s->watch_write(0) if $s->write($atomref); - } - } - - if ($need_clean) { - @subs = grep { ! $_->{closed} } @subs; - } -} - -sub emit_timestamp { - my $time = shift; - $last_timestamp = $time; - foreach my $s (@subs) { - next if $s->{closed}; - $s->{alive_time} = $time; - $s->write(\ "\n"); - } -} - -sub filter { - my ($put, $get) = @_; - return 0 if scalar @$put < scalar @$get; - for( my $i = 0 ; $i < scalar @$get ; $i++) { - return 0 if $put->[$i] ne $get->[$i]; - } - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - Perlbal::Socket::register_callback(1, sub { - my $now = time(); - emit_timestamp($now) if $now > $last_timestamp; - return 1; - }); - - $svc->register_hook('AtomStream', 'start_http_request', sub { - my Perlbal::ClientProxy $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - return 0 unless $hds; - my $uri = URI->new($hds->request_uri); - my @get_segments = $uri->path_segments; - $self->{scratch}{get_segments} = \@get_segments; - return 0 unless pop @get_segments eq 'atom-stream.xml'; - my %params = $uri->query_form; - my $since = $params{since} =~ /\d+/ ? $params{since} : 0; - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Type", "text/xml"); - $res->header('Connection', 'close'); - - push @subs, $self; - - $self->write($res->to_string_ref); - - my $last_rv = $self->write(\ "\n\n"); - - # if they'd like a playback, give them all items >= time requested - if ($since) { - foreach my $item (@recent) { - next if $item->[0] < $since; - next unless filter($item->[2], \@get_segments); - $last_rv = $self->write($item->[1]); - } - } - - $self->watch_write(0) if $last_rv; - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - return 1; -} - -# called when we are loaded -sub load { - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm b/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm deleted file mode 100644 index 2d844227..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm +++ /dev/null @@ -1,51 +0,0 @@ -package Perlbal::Plugin::AutoRemoveLeadingDir; - -# -# this plugin auto-removes a leading directory path component -# in the URL, if it's the name of the directory the webserver -# is rooted at. -# -# if docroot = /home/lj/htdocs/stc/ -# -# and user requests: -# -# /stc/img/foo.jpg -# -# Then this plugin will treat that as if it's a request for /img/foo.jpg. -# -# This is useful for css/js/etc to have an "absolute" pathname for -# peer resources (think css having url(/stc/foo.jpg)) that can be served -# from either a separate hostname (stat.livejournal.com) and using a CDN, -# or from www. when cross-domain js restrictions require it. - -use Perlbal; -use strict; -use warnings; - -sub load { 1 } -sub unload { 1 } - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('AutoRemoveLeadingDir', 'start_serve_request', sub { - my Perlbal::ClientHTTPBase $client = shift; - my $uriref = shift; - - my Perlbal::Service $svc = $client->{service}; - my ($tail) = ($svc->{docroot} =~ m!/([\w-]+)/?$!); - $$uriref =~ s!^/$tail!! if $tail; - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Cgilike.pm b/lib/mogdeps/Perlbal/Plugin/Cgilike.pm deleted file mode 100644 index e841ff9f..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Cgilike.pm +++ /dev/null @@ -1,343 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2007 Martin Atkins and Six Apart Ltd. -# - -=head1 NAME - -Perlbal::Plugin::Cgilike - Handle Perlbal requests with a Perl subroutine - -=head1 DESCRIPTION - -This module allows responses to be handled with a simple API that's similar in principle to -CGI, mod_perl response handlers, etc. - -It does not, however, come anywhere close to conforming to the CGI "standard". It's actually -more like mod_perl in usage, though there are several differences. -Most notably, Perlbal is single-process and single-threaded, and handlers run inside the Perlbal -process and must therefore return quickly and not do any blocking operations. - -As it currently stands, this is very bare-bones and has only really been used with basic GET -requests. It lacks a nice API for handling the body of a POST or PUT request. - -It is not recommended to use this for extensive applications. Perlbal is first and foremost -a load balancer, so if you're doing something at all complicated you're probably better off -using something like Apache mod_perl and then putting Perlbal in front if it if necessary. -However, this plugin may prove useful for simple handlers or perhaps embedding a simple -HTTP service into another application that uses C. - -=head1 SYNOPSIS - -This module provides a Perlbal plugin which can be loaded and used as follows. - - LOAD cgilike - PERLREQUIRE = MyPackage - - CREATE SERVICE cgilike - SET role = web_server - SET listen = 127.0.0.1:80 - SET plugins = cgilike - PERLHANDLER = MyPackage::handler - ENABLE cgilike - -With this plugin loaded into a particular service, the plugin will then be called for -all requests for that service. - -Set cgilike.handler to the name of a subroutine that will handle requests. This subroutine -will receive an object which allows interaction with the Perlbal service. - - package MyPackage - sub handler { - my ($r) = @_; - if ($r->uri eq '/') { - print "

Hello, world

"; - return Perlbal::Plugin::Cgilike::HANDLED; - } - else { - return 404; - } - } - -Return C to indicate that the request has been handled, or return some HTTP error code -to produce a predefined error message. -You may also return C if you do not wish to handle the request, in which case Perlbal -will be allowed to handle the request in whatever way it would have done without Cgilike loaded. - -If your handler returns any non-success value, it B produce any output. If you -produce output before returning such a value, the response to the client is likely to be -utter nonsense. - -You may also return C, which is equivalent to -returning zero except that the HTTP connection will be left open once you return. It is -your responsibility to later call C<$r-Eend_response()> when you have completed -the response. This style is necessary when you need to perform some long operation -before you can return a response; you'll need to use some appropriate method to set -a callback to run when the operation completes and then do your response in the -callback. Once you've called C, you must not call any further methods on C<$r>; -it's probably safest to just return immediately afterwards to avoid any mishaps. - -=head1 API DOCUMENTATION - -TODO: Write this - -=head1 TODO - -Currently there is no API for dealing with the body of a POST or PUT request. Ideally it'd be able -to do automatic decoding of application/x-www-form-urlencoded data, too. - -The POSTPONE_RESPONSE functionality has not been tested extensively and is probably buggy. - -=head1 COPYRIGHT AND LICENSE - -Copyright 2007 Martin Atkins and Six Apart Ltd. - -This module is part of the Perlbal distribution, and as such can be distributed under the same licence terms as the rest of Perlbal. - -=cut - -package Perlbal::Plugin::Cgilike; - -use Perlbal; -use strict; -use Symbol; - -use constant DECLINED => -2; -use constant HANDLED => 0; -use constant POSTPONE_RESPONSE => -1; - -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('Cgilike', 'start_http_request', sub { Perlbal::Plugin::Cgilike::handle_request($svc, $_[0]); }); - -} - -sub handle_request { - my Perlbal::Service $svc = shift; - my Perlbal::ClientProxy $pb = shift; - return 0 unless $pb->{req_headers}; - - # Create a new request object, and tie a filehandle to it - my $output_handle = Symbol::gensym(); - my $req = tie(*{$output_handle}, 'Perlbal::Plugin::Cgilike::Request', $pb); - - my $handler = $svc->{extra_config}->{_perlhandler}; - if (! defined($handler)) { - return $pb->send_response(500, "No perlhandler is configured for this service"); - } - - # Our $output_handle is tied to the request object, which provides PRINT and PRINTF methods - # Set it as the default so that handlers can just use print and printf as normal. - my $oldfh = select($output_handle); - - my $ret; - my $result = eval { - no strict; - $ret = &{$handler}($req); - 1; - }; - - # Restore the old filehandle to avoid breaking anyone else - select($oldfh); - - if ($result) { - if ($ret == 0 || $ret == POSTPONE_RESPONSE) { - if ($ret == 0) { - $req->end_response(); - untie($req); - } - return 1; - } - elsif ($ret == DECLINED) { - return 0; - } - else { - return $pb->send_response($ret+0, $ret+0); - } - } - else { - return $pb->send_response(500, "Error in handler: ".$@); - } - - return $pb->send_response(500, "I seem to have fallen into a place I shouldn't be."); - -} - -sub handle_perlrequire_command { - # This is defined with an equals because Perlbal lowercases all manage commands except - # after an equals, which means that having an equals here is the only way to actually - # get the correct case of the module name. Lame++. - my $mc = shift->parse(qr/^perlrequire\s*=\s*([\w:]+)$/, "usage: PERLREQUIRE="); - my ($module) = $mc->args; - - my $success = eval "require $module; 1;"; - - unless ($success) { - return $mc->err("Failed to load $module: $@") - } - - return 1; -} - -sub handle_perlhandler_command { - my $mc = shift->parse(qr/^perlhandler\s*=\s*([\w:]+)$/, "usage: PERLHANDLER="); - my ($subname) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $svc = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $svc; - - my $cfg = $svc->{extra_config}->{_perlhandler} = $subname; - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - $svc->unregister_hooks('Cgilike'); - return 1; -} - -# called when we are loaded -sub load { - Perlbal::register_global_hook('manage_command.perlrequire', \&Perlbal::Plugin::Cgilike::handle_perlrequire_command); - Perlbal::register_global_hook('manage_command.perlhandler', \&Perlbal::Plugin::Cgilike::handle_perlhandler_command); - - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -package Perlbal::Plugin::Cgilike::Request; - -use URI; - -sub new { - my ($class, $pb) = @_; - - return bless { - pb => $pb, - header_sent => 0, - }, $class; -} - -# This class can also provide a tied handle which supports PRINT and PRINTF (but not much else) -sub TIEHANDLE { - my ($class, $req_headers) = @_; - return $class->new($req_headers); -} - -sub request_header { - return $_[0]->{pb}->{req_headers}; -} - -sub response_header { - my ($self, $k, $v) = @_; - - if (defined($k)) { - my $hdrs = $self->response_header; - if (defined($v)) { - $hdrs->header($k => $v); - return $v; - } - else { - return $hdrs->header($k); - } - } - else { - if (defined($self->{response_headers})) { - return $self->{response_headers}; - } - else { - return $self->{response_headers} = Perlbal::HTTPHeaders->new_response(200); - } - } -} - -sub response_status_code { - my ($self, $value) = @_; - - my $res = $self->response_header; - if (defined($value)) { - $res->code($value); - } - - return $res->response_code; -} - -sub uri { - my ($self) = @_; - return $self->{uri} ? $self->{uri} : $self->{uri} = URI->new($self->request_header->request_uri); -} - -sub path { - my ($self) = @_; - return $self->uri->path; -} - -sub path_segments { - my ($self) = @_; - my @segments = $self->uri->path_segments; - shift @segments; # Get rid of the empty segment at the start - return @segments; -} - -sub query_string { - my ($self) = @_; - return $self->uri->query; -} - -sub query_args { - my ($self) = @_; - return $self->uri->query_form; -} - -sub method { - my ($self) = @_; - return $self->request_header->request_method; -} - -sub send_response_header { - my ($self) = @_; - $self->response_header('Content-type' => 'text/html') unless $self->response_header('Content-type'); - $self->{pb}->write($self->response_header->to_string_ref); - $self->{header_sent} = 1; -} - -sub response_header_sent { - return $_[0]->{header_sent} ? 1 : 0; -} - -sub PRINT { - my ($self, @stuff) = @_; - $self->print(@stuff); -} - -sub PRINTF { - my ($self, $format, @stuff) = @_; - $self->print(sprintf($format, @stuff)); -} - -sub print { - my ($self, @stuff) = @_; - if (! $self->response_header_sent) { - $self->send_response_header(); - } - $self->{pb}->write(join("", @stuff)); -} - -sub end_response { - my ($self) = @_; - $self->{pb}->write(sub { $self->{pb}->http_response_sent; }); -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/EchoService.pm b/lib/mogdeps/Perlbal/Plugin/EchoService.pm deleted file mode 100644 index 3f6f1f7a..00000000 --- a/lib/mogdeps/Perlbal/Plugin/EchoService.pm +++ /dev/null @@ -1,123 +0,0 @@ -########################################################################### -# simple plugin demonstrating how to create an add-on service for Perlbal -# using the plugin infrastructure -########################################################################### - -package Perlbal::Plugin::EchoService; - -use strict; -use warnings; - -# on load we need to define the service and any parameters we want -sub load { - - # define the echo service, which instantiates this type of object - Perlbal::Service::add_role( - echo => \&Perlbal::Plugin::EchoService::Client::new, - ); - - # add up custom configuration options that people are allowed to set on the echo_service - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.echo_delay = 5 - # defines how long to wait between getting text and echoing it back - echo_delay => { - des => "Time in seconds to pause before sending text back using the echo service.", - default => 0, # no delay - check_role => "echo", - check_type => "int", - } - ); - - return 1; -} - -# remove the various things we've hooked into, this is required as a way of -# being good to the system... -sub unload { - Perlbal::Service::remove_tunable('echo_delay'); - Perlbal::Service::remove_role('echo'); - return 1; -} - - -########################################################################### -# this is the implementation of the client that gets instantiated by the -# service. (which is really all a service does - instantiate the right -# type of client, and store some information) -########################################################################### - -package Perlbal::Plugin::EchoService::Client; -use strict; -use warnings; - -use base "Perlbal::Socket"; -use fields ('service', # the service we're from - 'buf'); # the buffer of what we're reading - -# create a new object of this class -sub new { - my $class = "Perlbal::Plugin::EchoService::Client"; - my ($service, $sock) = @_; - my $self = fields::new($class); - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{buf} = ""; # what we've read so far, not forming a complete line - - $self->watch_read(1); - return $self; -} - -# called when we are readable - i.e. there is data available -sub event_read { - my Perlbal::Plugin::EchoService::Client $self = shift; - - # try to read in 1k of data, remember to close if you get undef, as that means - # something went wrong, or the socket was closed - my $bref = $self->read(1024); - return $self->close() unless defined $bref; - $self->{buf} .= $$bref; - - # now, parse out any lines that we have gotten. this just removes data line by - # line so we can handle it. - while ($self->{buf} =~ s/^(.+?)\r?\n//) { - my $line = $1; - - # package up a sub to do what we want. this is in a coderef because we either - # need to call it now or schedule it for later. saves some duplication. - my $do_echo = sub { $self->write("$line\r\n"); }; - - # if they want a delay, we have to schedule this for later - if (my $delay = $self->{service}->{extra_config}->{echo_delay}) { - # schedule - Danga::Socket->AddTimer($delay, $do_echo); - - } else { - # immediately, so run it - $do_echo->(); - - } - } -} - -# called when we are writeable - that is, we are allowed to write data now. try to -# flush any existing data and then if we have nothing in the write buffer left, -# go ahead and stop notifying us about writeability. -sub event_write { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->watch_write(0) if $self->write(undef); -} - -# if we run into some socket error, just close -sub event_err { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->close; -} - -# same thing if we get a hup -sub event_hup { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->close; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Highpri.pm b/lib/mogdeps/Perlbal/Plugin/Highpri.pm deleted file mode 100644 index 0c269681..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Highpri.pm +++ /dev/null @@ -1,125 +0,0 @@ -########################################################################### -# plugin that makes some requests high priority. this is very LiveJournal -# specific, as this makes requests to the client protocol be treated as -# high priority requests. -########################################################################### - -package Perlbal::Plugin::Highpri; - -use strict; -use warnings; - -# keep track of services we're loaded for -our %Services; - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # create a compiled regexp for very frequent use later - my $uri_check = qr{^(?:/interface/(?:xmlrpc|flat)|/login\.bml)$}; - my $host_check = undef; - - # setup default extra config info - $svc->{extra_config}->{highpri_uri_check_str} = '^(?:/interface/(?:xmlrpc|flat)|/login\.bml)$'; - $svc->{extra_config}->{highpri_host_check_str} = 'undef'; - - # config setter reference - my $config_set = sub { - my ($out, $what, $val) = @_; - return 0 unless $what && $val; - - # setup an error sub - my $err = sub { - $out->("ERROR: $_[0]") if $out; - return 0; - }; - - # if they said undef, that's not a regexp, that means use none - my $temp; - unless ($val eq 'undef' || $val eq 'none' || $val eq 'null') { - # verify this regexp works? do it in an eval because qr will die - # if we give it something invalid - eval { - $temp = qr{$val}; - }; - return $err->("Invalid regular expression") if $@ || !$temp; - } - - # see what they want to set and set it - if ($what =~ /^uri_pattern/i) { - $uri_check = $temp; - $svc->{extra_config}->{highpri_uri_check_str} = $val; - } elsif ($what =~ /^host_pattern/i) { - $host_check = $temp; - $svc->{extra_config}->{highpri_host_check_str} = $val; - } else { - return $err->("Plugin understands: uri_pattern, host_pattern"); - } - - # 1 for success! - return 1; - }; - - # register things to take in configuration regular expressions - $svc->register_setter('Highpri', 'uri_pattern', $config_set); - $svc->register_setter('Highpri', 'host_pattern', $config_set); - - # more complicated statistics - $svc->register_hook('Highpri', 'make_high_priority', sub { - my Perlbal::ClientProxy $cp = shift; - - # check it against our compiled regexp - return 1 if $uri_check && - $cp->{req_headers}->request_uri =~ /$uri_check/; - if ($host_check) { - my $hostname = $cp->{req_headers}->header('Host'); - return 1 if $hostname && $hostname =~ /$host_check/; - } - - # doesn't fit, so return 0 - return 0; - }); - - # mark this service as being active in this plugin - $Services{"$svc"} = $svc; - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Highpri'); - $svc->unregister_setters('Highpri'); - return 1; -} - -# load global commands for querying this plugin on what's up -sub load { - # setup a command to see what the patterns are - Perlbal::register_global_hook('manage_command.patterns', sub { - my @res = ("High priority pattern buffer:"); - - foreach my $svc (values %Services) { - push @res, "SET $svc->{name}.highpri.uri_pattern = $svc->{extra_config}->{highpri_uri_check_str}"; - push @res, "SET $svc->{name}.highpri.host_pattern = $svc->{extra_config}->{highpri_host_check_str}"; - } - - push @res, "."; - return \@res; - }); - - return 1; -} - -# unload our global commands, clear our service object -sub unload { - Perlbal::unregister_global_hook('manage_command.patterns'); - %Services = (); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Include.pm b/lib/mogdeps/Perlbal/Plugin/Include.pm deleted file mode 100644 index 09499e26..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Include.pm +++ /dev/null @@ -1,90 +0,0 @@ -=head1 NAME - -Perlbal::Plugin::Include - Allows multiple, nesting configuration files - -=head1 DESCRIPTION - -This module adds an INCLUDE command to the Perlbal management console -and allows the globbed inclusion of configuration files. - -=head1 SYNOPSIS - -This module provides a Perlbal plugin which can be loaded and used as -follows: - - LOAD include - INCLUDE = /etc/perlbal/my.conf - -You may also specify multiple configuration files a la File::Glob: - - INCLUDE = /foo/bar.conf /foo/quux/*.conf - -=head1 BUGS AND LIMITATIONS - -This module relies entirely on Perlbal::load_config for loading, so if -you have trouble with INCLUDE, be sure you can load the same -configuration without error using "perlbal -c" first. - -Also note that Perlbal::load_config versions 1.60 and below do not use -a local filehandle while reading the configuration file, so this -module overrides that routine on load to allow nested calls. - -=head1 COPYRIGHT AND LICENSE - -Copyright 2008 Eamon Daly - -This module is part of the Perlbal distribution, and as such can be -distributed under the same licence terms as the rest of Perlbal. - -=cut - -package Perlbal::Plugin::Include; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we are loaded -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.include', sub { - my $mc = shift->parse(qr/^include\s+=\s+(.+)\s*$/, - "usage: INCLUDE = "); - - my ($glob) = $mc->args; - - for (glob($glob)) { - Perlbal::load_config($_, sub { print STDOUT "$_[0]\n"; }); - } - - return $mc->ok; - }); - - return 1; -} - -# called for a global unload -sub unload { - # unregister our global hooks - Perlbal::unregister_global_hook('manage_command.include'); - - return 1; -} - -# In older versions of Perlbal, load_config uses a typeglob, throwing -# warnings when re-entering. This uses a locally-scoped filehandle. -sub load_config_local { - my ($file, $writer) = @_; - open(my $fh, $file) or die "Error opening config file ($file): $!\n"; - my $ctx = Perlbal::CommandContext->new; - $ctx->verbose(0); - while (my $line = <$fh>) { - $line =~ s/\$(\w+)/$ENV{$1}/g; - return 0 unless Perlbal::run_manage_command($line, $writer, $ctx); - } - close($fh); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm b/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm deleted file mode 100644 index c6814cf4..00000000 --- a/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm +++ /dev/null @@ -1,103 +0,0 @@ -package Perlbal::Plugin::LazyCDN; - -use IO::Socket::INET; -use Perlbal; -use Perlbal::ClientHTTPBase; -use strict; -use warnings; - -sub load { - # add up custom configuration options that people are allowed to set - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.fallback_service = proxy - fallback_service => { - des => "Service name to fall back to when a static get or concat get requests something newer than on disk.", - check_role => "web_server", - } - ); - - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.fallback_udp_ping_addr = 5 - fallback_udp_ping_addr => { - des => "Address and port to send UDP packets containing URL requests .", - check_role => "web_server", - check_type => ["regexp", qr/^\d+\.\d+\.\d+\.\d+:\d+$/, "Expecting IP:port of form a.b.c.d:port."], - } - ); - return 1; -} - -# remove the various things we've hooked into, this is required as a way of -# being good to the system... -sub unload { - Perlbal::Service::remove_tunable('fallback_service'); - Perlbal::Service::remove_tunable('fallback_udp_ping_addr'); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $socket; - - my $hook = sub { - my Perlbal::ClientHTTPBase $client = shift; - my $last_modified = shift; # unix timestamp for last modified of the concatenated files - - my $fallback_service_name = $client->{service}->{extra_config}->{fallback_service}; - return unless $fallback_service_name; - - my $fallback_service = Perlbal->service($fallback_service_name); - return unless $fallback_service; - - my $req_hd = $client->{req_headers}; - - my $uri = $req_hd->request_uri; - - my ($v) = $uri =~ m/\bv=(\d+)\b/; - - if (defined $last_modified) { - return unless $v; - return 0 unless $v > $last_modified; - } - - if (my $fallback_ping_addr = $client->{service}->{extra_config}->{fallback_udp_ping_addr}) { - $socket ||= _ping_socket($fallback_ping_addr); - $socket->write($uri); - } - - $fallback_service->adopt_base_client( $client ); - - return 1; - }; - - $svc->register_hook('LazyCDN', 'static_get_poststat_pre_send', $hook); - $svc->register_hook('LazyCDN', 'concat_get_poststat_pre_send', $hook); - - $svc->register_hook('LazyCDN', 'static_get_poststat_file_missing', $hook); - $svc->register_hook('LazyCDN', 'concat_get_poststat_file_missing', $hook); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -sub _ping_socket { - my $hostspec = shift; - my $socket = IO::Socket::INET->new( - PeerAddr => $hostspec, - Proto => 'udp', - Broadcast => 1, - ReuseAddr => 1) - or warn "Can't bind udp ping socket: $!\n"; - return $socket; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm b/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm deleted file mode 100644 index 82c95ad5..00000000 --- a/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm +++ /dev/null @@ -1,91 +0,0 @@ -package Perlbal::Plugin::MaxContentLength; - -=head1 NAME - -Perlbal::Plugin::MaxContentLength - Reject large requests - -=head1 SYNOPSIS - - LOAD MaxContentLength - CREATE SERVICE cgilike - # define a service... - SET max_content_length = 100000 - SET plugins = MaxContentLength - ENABLE cgilike - -=head1 DESCRIPTION - -This module rejects requests that are larger than a configured limit. If a -request bears a Content-Length header whose value exceeds the -max_content_length value, the request will be rejected with a 413 "Request -Entity Too Large" error. - -=head1 AUTHOR - -Adam Thomason, Eathomason@sixapart.comE - -=head1 COPYRIGHT AND LICENSE - -Copyright 2008 Six Apart Ltd. - -This module is part of the Perlbal distribution, and as such can be distributed -under the same licence terms as the rest of Perlbal. - -=cut - -use strict; -use warnings; - -use Perlbal; - -sub load { - Perlbal::Service::add_tunable( - max_content_length => { - check_role => '*', - check_type => 'int', - des => "maximum Content-Length allowed, in bytes. 0 for no limit", - default => 0, - }, - ); - return 1; -} - -use constant HANDLE_REQUEST => 0; -use constant IGNORE_REQUEST => 1; - -sub register { - my ($class, $svc) = @_; - - my $cfg = $svc->{extra_config}; - return unless $cfg; - - $svc->register_hook('MaxContentLength', 'start_http_request' => sub { - my $client = shift; - return IGNORE_REQUEST unless $client; - - # allow request if max is disabled - return HANDLE_REQUEST unless $cfg->{max_content_length}; - - my $headers = $client->{req_headers}; - return HANDLE_REQUEST unless $headers; - - # allow requests which don't have a Content-Length header - my $length = $headers->header('content-length'); - return HANDLE_REQUEST unless $length; - - # allow requests under the cap - return HANDLE_REQUEST if $length <= $cfg->{max_content_length}; - - $client->send_response(413, "Content too long.\n"); - return IGNORE_REQUEST; - }); -} - -sub unregister { - my ($class, $svc) = @_; - - $svc->unregister_hooks('MaxContentLength'); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/NotModified.pm b/lib/mogdeps/Perlbal/Plugin/NotModified.pm deleted file mode 100644 index d6d965fb..00000000 --- a/lib/mogdeps/Perlbal/Plugin/NotModified.pm +++ /dev/null @@ -1,84 +0,0 @@ -package Perlbal::Plugin::NotModified; - -use Perlbal; -use strict; -use warnings; - -# Takes settings in perlbal like: -# SET ss.notmodified.host_pattern = ^example\.com -# -# The value is a regular expression to match against the Host: header on the incoming request. - -sub load { - my $class = shift; - return 1; -} - -sub unload { - my $class = shift; - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $host_check_regex = undef; - - my $start_http_request_hook = sub { - my Perlbal::ClientHTTPBase $client = shift; - my Perlbal::HTTPHeaders $hds = $client->{req_headers}; - return 0 unless $hds; - - my $uri = $hds->request_uri; - - return 0 unless $uri; - - my $host = $hds->header("Host"); - - return 0 unless $host; - return 0 unless $host =~ $host_check_regex; - - my $ims = $hds->header("If-Modified-Since"); - - return 0 unless $ims; - - $client->send_response(304, "Not Modified"); - - return 1; - }; - - # register things to take in configuration regular expressions - $svc->register_setter('NotModified', 'host_pattern', sub { - my ($out, $what, $val) = @_; - return 0 unless $what && $val; - - my $err = sub { - $out->("ERROR: $_[0]") if $out; - return 0; - }; - - unless (length $val) { - $host_check_regex = undef; - $svc->unregister_hooks('NotModified'); - return 1; - } - - $host_check_regex = qr/$val/; - $svc->register_hook('NotModified', 'start_http_request', $start_http_request_hook); - - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->unregister_hooks('NotModified'); - $svc->unregister_setters('NotModified'); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Palimg.pm b/lib/mogdeps/Perlbal/Plugin/Palimg.pm deleted file mode 100644 index 7e7abb1e..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Palimg.pm +++ /dev/null @@ -1,368 +0,0 @@ -########################################################################### -# Palimg plugin that allows Perlbal to serve palette altered images -########################################################################### - -package Perlbal::Plugin::Palimg; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # verify that an incoming request is a palimg request - $svc->register_hook('Palimg', 'start_serve_request', sub { - my Perlbal::ClientHTTPBase $obj = $_[0]; - return 0 unless $obj; - my Perlbal::HTTPHeaders $hd = $obj->{req_headers}; - my $uriref = $_[1]; - return 0 unless $uriref; - - # if this is palimg, peel off the requested modifications and put in headers - return 0 unless $$uriref =~ m!^/palimg/(.+)\.(\w+)(.*)$!; - my ($fn, $ext, $extra) = ($1, $2, $3); - return 0 unless $extra; - my ($palspec) = $extra =~ m!^/p(.+)$!; - return 0 unless $fn && $palspec; - - # must be ok, setup for it - $$uriref = "/palimg/$fn.$ext"; - $obj->{scratch}->{palimg} = [ $ext, $palspec ]; - return 0; - }); - - # actually serve a palimg - $svc->register_hook('Palimg', 'start_send_file', sub { - my Perlbal::ClientHTTPBase $obj = $_[0]; - return 0 unless $obj && - (my $palimginfo = $obj->{scratch}->{palimg}); - - # turn off writes - $obj->watch_write(0); - - # create filehandle for reading - my $data = ''; - Perlbal::AIO::aio_read($obj->reproxy_fh, 0, 2048, $data, sub { - # got data? undef is error - return $obj->_simple_response(500) unless $_[0] > 0; - - # pass down to handler - my Perlbal::HTTPHeaders $hd = $obj->{req_headers}; - my $res = PalImg::modify_file(\$data, $palimginfo->[0], $palimginfo->[1]); - return $obj->_simple_response(500) unless defined $res; - return $obj->_simple_response($res) if $res; - - # seek into the file now so sendfile starts further in - my $ld = length $data; - sysseek($obj->{reproxy_fh}, $ld, &POSIX::SEEK_SET); - $obj->{reproxy_file_offset} = $ld; - - # re-enable writes after we get data - $obj->tcp_cork(1); # by setting reproxy_file_offset above, it won't cork, so we cork it - $obj->write($data); - $obj->watch_write(1); - }); - - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Palimg'); - return 1; -} - -# called when we are loaded/unloaded ... someday add some stats viewing -# commands here? -sub load { return 1; } -sub unload { return 1; } - -####### PALIMG START ########################################################################### -package PalImg; - -sub parse_hex_color -{ - my $color = shift; - return [ map { hex(substr($color, $_, 2)) } (0,2,4) ]; -} - -sub modify_file -{ - my ($data, $type, $palspec) = @_; - - # palette altering - my %pal_colors; - if (my $pals = $palspec) { - my $hx = "[0-9a-f]"; - if ($pals =~ /^g($hx{2,2})($hx{6,6})($hx{2,2})($hx{6,6})$/) { - # gradient from index $1, color $2, to index $3, color $4 - my $from = hex($1); - my $to = hex($3); - return 404 if $from == $to; - my $fcolor = parse_hex_color($2); - my $tcolor = parse_hex_color($4); - if ($to < $from) { - ($from, $to, $fcolor, $tcolor) = - ($to, $from, $tcolor, $fcolor); - } - for (my $i=$from; $i<=$to; $i++) { - $pal_colors{$i} = [ map { - int($fcolor->[$_] + - ($tcolor->[$_] - $fcolor->[$_]) * - ($i-$from) / ($to-$from)) - } (0..2) ]; - } - } elsif ($pals =~ /^t($hx{6,6})($hx{6,6})?$/) { - # tint everything towards color - my ($t, $td) = ($1, $2); - $pal_colors{'tint'} = parse_hex_color($t); - $pal_colors{'tint_dark'} = $td ? parse_hex_color($td) : [0,0,0]; - } elsif (length($pals) > 42 || $pals =~ /[^0-9a-f]/) { - return 404; - } else { - my $len = length($pals); - return 404 if $len % 7; # must be multiple of 7 chars - for (my $i = 0; $i < $len/7; $i++) { - my $palindex = hex(substr($pals, $i*7, 1)); - $pal_colors{$palindex} = [ - hex(substr($pals, $i*7+1, 2)), - hex(substr($pals, $i*7+3, 2)), - hex(substr($pals, $i*7+5, 2)), - substr($pals, $i*7+1, 6), - ]; - } - } - } - - if (%pal_colors) { - if ($type eq 'gif') { - return 404 unless PaletteModify::new_gif_palette($data, \%pal_colors); - } elsif ($type eq 'png') { - return 404 unless PaletteModify::new_png_palette($data, \%pal_colors); - } - } - - # success - return 0; -} -####### PALIMG END ############################################################################# - -####### PALETTEMODIFY START #################################################################### -package PaletteModify; - -BEGIN { - $PaletteModify::HAVE_CRC = eval "use String::CRC32 (); 1;"; -} - -sub common_alter -{ - my ($palref, $table) = @_; - my $length = length $table; - - my $pal_size = $length / 3; - - # tinting image? if so, we're remaking the whole palette - if (my $tint = $palref->{'tint'}) { - my $dark = $palref->{'tint_dark'}; - my $diff = [ map { $tint->[$_] - $dark->[$_] } (0..2) ]; - $palref = {}; - for (my $idx=0; $idx<$pal_size; $idx++) { - for my $c (0..2) { - my $curr = ord(substr($table, $idx*3+$c)); - my $p = \$palref->{$idx}->[$c]; - $$p = int($dark->[$c] + $diff->[$c] * $curr / 255); - } - } - } - - while (my ($idx, $c) = each %$palref) { - next if $idx >= $pal_size; - substr($table, $idx*3+$_, 1) = chr($c->[$_]) for (0..2); - } - - return $table; -} - -sub new_gif_palette -{ - my ($data, $palref) = @_; - - # make sure we have data to operate on, or the substrs below die - return unless $$data; - - # 13 bytes for magic + image info (size, color depth, etc) - # and then the global palette table (3*256) - my $header = substr($$data, 0, 13+3*256); - - # figure out how big global color table is (don't want to overwrite it) - my $pf = ord substr($header, 10, 1); - my $gct = 2 ** (($pf & 7) + 1); # last 3 bits of packaged fields - - # final sanity check for size so the substr below doesn't die - return unless length $header >= 13 + 3 * $gct; - - substr($header, 13, 3*$gct) = common_alter($palref, substr($header, 13, 3*$gct)); - $$data = $header; - return 1; -} - -sub new_png_palette -{ - my ($data, $palref) = @_; - - # subroutine for reading data - my ($curidx, $maxlen) = (0, length $$data); - my $read = sub { - # put $_[1] data into scalar reference $_[0] - return undef if $_[1] + $curidx > $maxlen; - ${$_[0]} = substr($$data, $curidx, $_[1]); - $curidx += $_[1]; - return length ${$_[0]}; - }; - - # without this module, we can't proceed. - return 0 unless $PaletteModify::HAVE_CRC; - - my $imgdata; - - # Validate PNG signature - my $png_sig = pack("H16", "89504E470D0A1A0A"); - my $sig; - $read->(\$sig, 8); - return 0 unless $sig eq $png_sig; - $imgdata .= $sig; - - # Start reading in chunks - my ($length, $type) = (0, ''); - while ($read->(\$length, 4)) { - - $imgdata .= $length; - $length = unpack("N", $length); - return 0 unless $read->(\$type, 4) == 4; - $imgdata .= $type; - - if ($type eq 'IHDR') { - my $header; - $read->(\$header, $length+4); - my ($width,$height,$depth,$color,$compression, - $filter,$interlace, $CRC) - = unpack("NNCCCCCN", $header); - return 0 unless $color == 3; # unpaletted image - $imgdata .= $header; - } elsif ($type eq 'PLTE') { - # Finally, we can go to work - my $palettedata; - $read->(\$palettedata, $length); - $palettedata = common_alter($palref, $palettedata); - $imgdata .= $palettedata; - - # Skip old CRC - my $skip; - $read->(\$skip, 4); - - # Generate new CRC - my $crc = String::CRC32::crc32($type . $palettedata); - $crc = pack("N", $crc); - - $imgdata .= $crc; - $$data = $imgdata; - return 1; - } else { - my $skip; - # Skip rest of chunk and add to imgdata - # Number of bytes is +4 because of CRC - # - for (my $count=0; $count < $length + 4; $count++) { - $read->(\$skip, 1); - $imgdata .= $skip; - } - } - } - - return 0; -} -####### PALETTEMODIFY END ###################################################################### - -1; - -__END__ - -=head1 NAME - -Perlbal::Plugin::Palimg - plugin that allows Perlbal to serve palette altered images - -=head1 VERSION - -This documentation refers to C that ships with Perlbal 1.50 - -=head1 DESCRIPTION - -Palimg is a perlbal plugin that allows you to modify C and C on the fly. Put the images you want to be able to modify into the C directory. You modify them by adding C to the end of the url, where SPEC is one of the below defined commands (gradient, tint, etc). - -=head1 CONFIGURING PERLBAL - -To configure your Perlbal installation to use Palimg you'll need to C the plugin then add a service parameter to a C service to activate it. - -Example C: - - LOAD palimg - - CREATE SERVICE palex - SET listen = ${ip:eth0}:80 - SET role = web_server - SET plugins = palimg - SET docroot = /usr/share/doc/ - SET dirindexing = 0 - ENABLE palex - -=head1 GRADIENTS - -You can change the gradient of the image by adding C to the end of the url. C<00> is the index where the gradient starts and C<111111> is the color (in hex) of the beginning of the gradient. C<64> is the index of the end of the gradient and C is the color of the end of the gradient. Note that all colors specified in hex should be lowercase. - -Example: - - http://192.168.0.1/palimg/logo.gif/pg01aaaaaa99cccccc - -=head1 TINTING - -You can tint the image by adding C to the end of the url. C<000000> should be replaced with the color to tint towards. C is optional and defines the "dark" tint color. Both colors should be specified as lowercase hex numbers. - -Example: - - http://192.168.0.1/palimg/logo.gif/pt1c1c1c22dba1 - -=head1 PALETTE REPLACEMENT - -You can specify a palette to replace the palette of the image. Do this by adding up to six sets of seven hex lowercase numbers prefixed with C

to the end of the URL. - -Example: - - http://192.168.0.1/palimg/logo.gif/p01234567890abcfffffffcccccccddddddd - -=head1 BUGS AND LIMITATIONS - -There are no known bugs in this module. - -Please report problems to the Perlbal mailing list, http://groups.google.com/group/perlbal - -Patches are welcome. - -=head1 AUTHORS - -Brad Fitzpatrick -Mark Smith - -=head1 LICENSE AND COPYRIGHT - -Artistic/GPLv2, at your choosing. - -Copyright 2004, Danga Interactive -Copyright 2005-2007, Six Apart Ltd diff --git a/lib/mogdeps/Perlbal/Plugin/Queues.pm b/lib/mogdeps/Perlbal/Plugin/Queues.pm deleted file mode 100644 index 027110d3..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Queues.pm +++ /dev/null @@ -1,55 +0,0 @@ -########################################################################### -# simple queue length header inclusion plugin -########################################################################### - -package Perlbal::Plugin::Queues; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # more complicated statistics - $svc->register_hook('Queues', 'backend_client_assigned', sub { - my Perlbal::BackendHTTP $obj = shift; - my Perlbal::HTTPHeaders $hds = $obj->{req_headers}; - my Perlbal::Service $svc = $obj->{service}; - return 0 unless defined $hds && defined $svc; - - # determine age of oldest (first in line) - my $now = time; - my Perlbal::ClientProxy $cp = $svc->{waiting_clients}->[0]; - my $age = defined $cp ? ($now - $cp->{last_request_time}) : 0; - - # now do the age of the high priority queue - $cp = $svc->{waiting_clients_highpri}->[0]; - my $hpage = defined $cp ? ($now - $cp->{last_request_time}) : 0; - - # setup the queue length headers - $hds->header('X-Queue-Count', scalar(@{$svc->{waiting_clients}})); - $hds->header('X-Queue-Age', $age); - $hds->header('X-HP-Queue-Count', scalar(@{$svc->{waiting_clients_highpri}})); - $hds->header('X-HP-Queue-Age', $hpage); - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Queues'); - return 1; -} - -# we don't do anything in here -sub load { return 1; } -sub unload { return 1; } - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Redirect.pm b/lib/mogdeps/Perlbal/Plugin/Redirect.pm deleted file mode 100644 index 19cdb1d3..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Redirect.pm +++ /dev/null @@ -1,130 +0,0 @@ -package Perlbal::Plugin::Redirect; -use strict; -use warnings; - -sub handle_request { - my ($svc, $pb) = @_; - - my $mappings = $svc->{extra_config}{_redirect_host}; - my $req_header = $pb->{req_headers}; - - # returns 1 if done with client, 0 if no action taken - my $map_using = sub { - my ($match_on) = @_; - - my $target_host = $mappings->{$match_on}; - - return 0 unless $target_host; - - my $path = $req_header->request_uri; - - my $res_header = Perlbal::HTTPHeaders->new_response(301); - $res_header->header('Location' => "http://$target_host$path"); - $res_header->header('Content-Length' => 0); - # For some reason a follow-up request gets a "400 Bad request" response, - # so until someone has time to figure out why, just punt and disable - # keep-alives after this request. - $res_header->header('Connection' => 'close'); - $pb->write($res_header->to_string_ref()); - - return 1; - }; - - # The following is lifted wholesale from the vhosts plugin. - # FIXME: Factor it out to a utility function, I guess? - # - # foo.site.com should match: - # foo.site.com - # *.foo.site.com - # *.site.com - # *.com - # * - - my $vhost = lc($req_header->header("Host")); - - # if no vhost, just try the * mapping - return $map_using->("*") unless $vhost; - - # Strip off the :portnumber, if any - $vhost =~ s/:\d+$//; - - # try the literal mapping - return 1 if $map_using->($vhost); - - # and now try wildcard mappings, removing one part of the domain - # at a time until we find something, or end up at "*" - - # first wildcard, prepending the "*." - my $wild = "*.$vhost"; - return 1 if $map_using->($wild); - - # now peel away subdomains - while ($wild =~ s/^\*\.[\w\-\_]+/*/) { - return 1 if $map_using->($wild); - } - - # last option: use the "*" wildcard - return $map_using->("*"); -} - -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('Redirect', 'start_http_request', sub { handle_request($svc, $_[0]); }); -} - -sub unregister { - my ($class, $svc) = @_; - $svc->unregister_hooks('Redirect'); -} - -sub handle_redirect_command { - my $mc = shift->parse(qr/^redirect\s+host\s+(\S+)\s+(\S+)$/, "usage: REDIRECT HOST "); - my ($match_host, $target_host) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $svc = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $svc; - - $svc->{extra_config}{_redirect_host} ||= {}; - $svc->{extra_config}{_redirect_host}{lc($match_host)} = lc($target_host); - - return 1; -} - -# called when we are loaded -sub load { - Perlbal::register_global_hook('manage_command.redirect', \&handle_redirect_command); - - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -1; - -=head1 NAME - -Perlbal::Plugin::Redirect - Plugin to do redirecting in Perlbal land - -=head1 SYNOPSIS - - LOAD redirect - - CREATE SERVICE redirector - SET role = web_server - SET plugins = redirect - REDIRECT HOST example.com www.example.net - ENABLE redirector - -=head1 LIMITATIONS - -Right now this can only redirect at the hostname level. Also, it just -assumes you want an http: URL. diff --git a/lib/mogdeps/Perlbal/Plugin/Stats.pm b/lib/mogdeps/Perlbal/Plugin/Stats.pm deleted file mode 100644 index e28b8866..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Stats.pm +++ /dev/null @@ -1,167 +0,0 @@ -########################################################################### -# basic Perlbal statistics gatherer -########################################################################### - -package Perlbal::Plugin::Stats; - -use strict; -use warnings; -no warnings qw(deprecated); - -use Time::HiRes qw(gettimeofday tv_interval); - -# setup our package variables -our %statobjs; # { svc_name => [ service, statobj ], svc_name => [ service, statobj ], ... } - -# define all stats keys here -our @statkeys = qw( files_sent files_reproxied - web_requests proxy_requests - proxy_requests_highpri ); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # create a stats object - my $sobj = Perlbal::Plugin::Stats::Storage->new(); - $statobjs{$svc->{name}} = [ $svc, $sobj ]; - - # simple events we count are done here. when the hook on the left side is called, - # we simply increment the count of the stat on the right side. - my %simple = qw( - start_send_file files_sent - start_file_reproxy files_reproxied - start_web_request web_requests - ); - - # create hooks for %simple things - while (my ($hook, $stat) = each %simple) { - eval "\$svc->register_hook('Stats', '$hook', sub { \$sobj->{'$stat'}++; return 0; });"; - return undef if $@; - } - - # more complicated statistics - $svc->register_hook('Stats', 'backend_client_assigned', sub { - my Perlbal::BackendHTTP $be = shift; - my Perlbal::ClientProxy $cp = $be->{client}; - $sobj->{pending}->{"$cp"} = [ gettimeofday() ]; - ($cp->{high_priority} ? $sobj->{proxy_requests_highpri} : $sobj->{proxy_requests})++; - return 0; - }); - $svc->register_hook('Stats', 'backend_response_received', sub { - my Perlbal::BackendHTTP $be = shift; - my Perlbal::ClientProxy $obj = $be->{client}; - my $ot = delete $sobj->{pending}->{"$obj"}; - return 0 unless defined $ot; - - # now construct data to put in recent - if (defined $obj->{req_headers}) { - my $uri = 'http://' . ($obj->{req_headers}->header('Host') || 'unknown') . $obj->{req_headers}->request_uri; - push @{$sobj->{recent}}, sprintf('%-6.4f %s', tv_interval($ot), $uri); - shift(@{$sobj->{recent}}) if scalar(@{$sobj->{recent}}) > 100; # if > 100 items, lose one - } - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Stats'); - delete $statobjs{$svc->{name}}; - return 1; -} - -# called when we are loaded -sub load { - # setup a management command to dump statistics - Perlbal::register_global_hook("manage_command.stats", sub { - my @res; - - # create temporary object for stats storage - my $gsobj = Perlbal::Plugin::Stats::Storage->new(); - - # dump per service - foreach my $svc (keys %statobjs) { - my $sobj = $statobjs{$svc}->[1]; - - # for now, simply dump the numbers we have - foreach my $key (sort @statkeys) { - push @res, sprintf("%-15s %-25s %12d", $svc, $key, $sobj->{$key}); - $gsobj->{$key} += $sobj->{$key}; - } - } - - # global stats - foreach my $key (sort @statkeys) { - push @res, sprintf("%-15s %-25s %12d", 'total', $key, $gsobj->{$key}); - } - - push @res, "."; - return \@res; - }); - - # recent requests and how long they took - Perlbal::register_global_hook("manage_command.recent", sub { - my @res; - foreach my $svc (keys %statobjs) { - my $sobj = $statobjs{$svc}->[1]; - push @res, "$svc $_" - foreach @{$sobj->{recent}}; - } - - push @res, "."; - return \@res; - }); - - return 1; -} - -# called for a global unload -sub unload { - # unregister our global hooks - Perlbal::unregister_global_hook('manage_command.stats'); - Perlbal::unregister_global_hook('manage_command.recent'); - - # take out all service stuff - foreach my $statref (values %statobjs) { - $statref->[0]->unregister_hooks('Stats'); - } - %statobjs = (); - - return 1; -} - -# statistics storage object -package Perlbal::Plugin::Stats::Storage; - -use fields ( - 'files_sent', # files sent from disk (includes reproxies and regular web requests) - 'files_reproxied', # files we've sent via reproxying (told to by backend) - 'web_requests', # requests we sent ourselves (no reproxy, no backend) - 'proxy_requests', # regular requests that went to a backend to be served - 'proxy_requests_highpri', # same as above, except high priority - - 'pending', # hashref; { "obj" => time_start } - 'recent', # arrayref; strings of recent URIs and times - ); - -sub new { - my Perlbal::Plugin::Stats::Storage $self = shift; - $self = fields::new($self) unless ref $self; - - # 0 initialize everything here - $self->{$_} = 0 foreach @Perlbal::Plugin::Stats::statkeys; - - # other setup - $self->{pending} = {}; - $self->{recent} = []; - - return $self; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Vhosts.pm b/lib/mogdeps/Perlbal/Plugin/Vhosts.pm deleted file mode 100644 index d1a36823..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Vhosts.pm +++ /dev/null @@ -1,179 +0,0 @@ -########################################################################### -# plugin to do name-based virtual hosts -########################################################################### - -# things to test: -# one persistent connection, first to a docs plugin, then to web proxy... see if it returns us to our base class after end of request -# PUTing a large file to a selector, seeing if it is put correctly to the PUT-enabled web_server proxy -# obvious cases: non-existent domains, default domains (*), proper matching (foo.brad.lj before *.brad.lj) -# - -package Perlbal::Plugin::Vhosts; - -use strict; -use warnings; -no warnings qw(deprecated); - -our %Services; # service_name => $svc - -# when "LOAD" directive loads us up -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.vhost', sub { - my $mc = shift->parse(qr/^vhost\s+(?:(\w+)\s+)?(\S+)\s*=\s*(\w+)$/, - "usage: VHOST [] = "); - my ($selname, $host, $target) = $mc->args; - unless ($selname ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service name not implied from context"); - } - - my $ss = Perlbal->service($selname); - return $mc->err("Service '$selname' is not a selector service") - unless $ss && $ss->{role} eq "selector"; - - $host = lc $host; - return $mc->err("invalid host pattern: '$host'") - unless $host =~ /^[\w\-\_\.\*\;\:]+$/; - - $ss->{extra_config}->{_vhosts} ||= {}; - $ss->{extra_config}->{_vhosts}{$host} = $target; - - return $mc->ok; - }); - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - - Perlbal::unregister_global_hook('manage_command.vhost'); - unregister($class, $_) foreach (values %Services); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - unless ($svc && $svc->{role} eq "selector") { - die "You can't load the vhost plugin on a service not of role selector.\n"; - } - - $svc->selector(\&vhost_selector); - $svc->{extra_config}->{_vhosts} = {}; - - $Services{"$svc"} = $svc; - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->selector(undef); - delete $Services{"$svc"}; - return 1; -} - -sub dumpconfig { - my ($class, $svc) = @_; - - my $vhosts = $svc->{extra_config}->{_vhosts}; - - return unless $vhosts; - - my @return; - - while (my ($vhost, $target) = each %$vhosts) { - push @return, "VHOST $vhost = $target"; - } - - return @return; -} - -# call back from Service via ClientHTTPBase's event_read calling service->select_new_service(Perlbal::ClientHTTPBase) -sub vhost_selector { - my Perlbal::ClientHTTPBase $cb = shift; - - my $req = $cb->{req_headers}; - return $cb->_simple_response(404, "Not Found (no reqheaders)") unless $req; - - my $vhost = $req->header("Host"); - - # Browsers and the Apache API considers 'www.example.com.' == 'www.example.com' - $vhost and $vhost =~ s/\.$//; - - my $uri = $req->request_uri; - my $maps = $cb->{service}{extra_config}{_vhosts} ||= {}; - - # ability to ask for one host, but actually use another. (for - # circumventing javascript/java/browser host restrictions when you - # actually control two domains). - if ($vhost && $uri =~ m!^/__using/([\w\.]+)(?:/\w+)(?:\?.*)?$!) { - my $alt_host = $1; - - # update our request object's Host header, if we ended up switching them - # around with /__using/... - my $svc_name = $maps->{"$vhost;using:$alt_host"}; - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $cb->_simple_response(404, "Vhost twiddling not configured for requested pair."); - return 1; - } - - $req->header("Host", $alt_host); - $svc->adopt_base_client($cb); - return 1; - } - - # returns 1 if done with client, 0 if no action taken - my $map_using = sub { - my ($match_on, $force) = @_; - - my $map_name = $maps->{$match_on}; - my $svc = $map_name ? Perlbal->service($map_name) : undef; - - return 0 unless $svc || $force; - - unless ($svc) { - $cb->_simple_response(404, "Not Found (no configured vhost)"); - return 1; - } - - $svc->adopt_base_client($cb); - return 1; - }; - - # foo.site.com should match: - # foo.site.com - # *.foo.site.com -- this one's questionable, but might as well? - # *.site.com - # *.com - # * - - # if no vhost, just try the * mapping - return $map_using->("*", 1) unless $vhost; - - # Strip off the :portnumber, if any - $vhost =~ s/:\d+$//; - - # try the literal mapping - return if $map_using->($vhost); - - # and now try wildcard mappings, removing one part of the domain - # at a time until we find something, or end up at "*" - - # first wildcard, prepending the "*." - my $wild = "*.$vhost"; - return if $map_using->($wild); - - # now peel away subdomains - while ($wild =~ s/^\*\.[\w\-\_]+/*/) { - return if $map_using->($wild); - } - - # last option: use the "*" wildcard - return $map_using->("*", 1); -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Vpaths.pm b/lib/mogdeps/Perlbal/Plugin/Vpaths.pm deleted file mode 100644 index 4108597d..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Vpaths.pm +++ /dev/null @@ -1,105 +0,0 @@ -########################################################################### -# plugin to use with selectors to select by path -# -# this will not play well with the Vhosts plugin or any other selector -# behavior plugins. -# -# this has also not been optimized for huge volume sites. -########################################################################### - -package Perlbal::Plugin::Vpaths; - -use strict; -use warnings; -no warnings qw(deprecated); - -our %Services; # service_name => $svc - -# when "LOAD" directive loads us up -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.vpath', sub { - my $mc = shift->parse(qr/^vpath\s+(?:(\w+)\s+)?(\S+)\s*=\s*(\w+)$/, - "usage: VPATH [] = "); - my ($selname, $regex, $target) = $mc->args; - unless ($selname ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service name not implied from context"); - } - - my $ss = Perlbal->service($selname); - return $mc->err("Service '$selname' is not a selector service") - unless $ss && $ss->{role} eq "selector"; - - my $cregex = qr/$regex/; - return $mc->err("invalid regular expression: '$regex'") - unless $cregex; - - $ss->{extra_config}->{_vpaths} ||= []; - push @{$ss->{extra_config}->{_vpaths}}, [ $cregex, $target ]; - - return $mc->ok; - }); - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - - Perlbal::unregister_global_hook('manage_command.vpath'); - unregister($class, $_) foreach (values %Services); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - unless ($svc && $svc->{role} eq "selector") { - die "You can't load the vpath plugin on a service not of role selector.\n"; - } - - $svc->selector(\&vpath_selector); - $svc->{extra_config}->{_vpaths} = []; - - $Services{"$svc"} = $svc; - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->selector(undef); - delete $Services{"$svc"}; - return 1; -} - -# call back from Service via ClientHTTPBase's event_read calling service->select_new_service(Perlbal::ClientHTTPBase) -sub vpath_selector { - my Perlbal::ClientHTTPBase $cb = shift; - - my $req = $cb->{req_headers}; - return $cb->_simple_response(404, "Not Found (no reqheaders)") unless $req; - - my $uri = $req->request_uri; - my $maps = $cb->{service}{extra_config}{_vpaths} ||= {}; - - # iterate down the list of paths, find any matches - foreach my $row (@$maps) { - next unless $uri =~ /$row->[0]/; - - my $svc_name = $row->[1]; - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $cb->_simple_response(404, "Not Found ($svc_name not a defined service)"); - return 1; - } - - $svc->adopt_base_client($cb); - return 1; - } - - return 0; -} - -1; diff --git a/lib/mogdeps/Perlbal/Pool.pm b/lib/mogdeps/Perlbal/Pool.pm deleted file mode 100644 index a6d88a1e..00000000 --- a/lib/mogdeps/Perlbal/Pool.pm +++ /dev/null @@ -1,301 +0,0 @@ -###################################################################### -# Pool class -###################################################################### -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::Pool; -use strict; -use warnings; - -use Perlbal::BackendHTTP; - -# how often to reload the nodefile -use constant NODEFILE_RELOAD_FREQ => 3; - -# balance methods we support (note: sendstats mode is now removed) -use constant BM_ROUNDROBIN => 2; -use constant BM_RANDOM => 3; - -use fields ( - 'name', # string; name of this pool - 'use_count', # int; number of services using us - 'nodes', # arrayref; [ip, port] values (port defaults to 80) - 'node_count', # int; number of nodes - 'node_used', # hashref; { ip:port => use count } - 'balance_method', # int; BM_ constant from above - - # used in nodefile mode - 'nodefile', # string; filename to read nodes from - 'nodefile.lastmod', # unix time nodefile was last modified - 'nodefile.lastcheck', # unix time nodefile was last stated - 'nodefile.checking', # boolean; if true AIO is stating the file for us - ); - -sub new { - my Perlbal::Pool $self = shift; - $self = fields::new($self) unless ref $self; - - my ($name) = @_; - - $self->{name} = $name; - $self->{use_count} = 0; - - $self->{nodes} = []; - $self->{node_count} = 0; - $self->{node_used} = {}; - - $self->{nodefile} = undef; - $self->{balance_method} = BM_RANDOM; - - return $self; -} - -sub set { - my Perlbal::Pool $self = shift; - - my ($key, $val, $mc) = @_; - my $set = sub { $self->{$key} = $val; return $mc->ok; }; - - if ($key eq 'nodefile') { - # allow to unset it, which stops us from checking it further, - # but doesn't clear our current list of nodes - if ($val =~ /^(?:none|undef|null|""|'')$/) { - $self->{'nodefile'} = undef; - $self->{'nodefile.lastmod'} = 0; - $self->{'nodefile.checking'} = 0; - $self->{'nodefile.lastcheck'} = 0; - return $mc->ok; - } - - # enforce that it exists from here on out - return $mc->err("File not found") - unless -e $val; - - # force a reload - $self->{'nodefile'} = $val; - $self->{'nodefile.lastmod'} = 0; - $self->{'nodefile.checking'} = 0; - $self->load_nodefile; - $self->{'nodefile.lastcheck'} = time; - return $mc->ok; - } - - if ($key eq "balance_method") { - $val = { - 'random' => BM_RANDOM, - }->{$val}; - return $mc->err("Unknown balance method") - unless $val; - return $set->(); - } - -} - -sub dumpconfig { - my Perlbal::Pool $self = shift; - my $name = $self->{name}; - - my @return; - - if (my $nodefile = $self->{'nodefile'}) { - push @return, "SET nodefile = $nodefile"; - } else { - foreach my $node (@{$self->{nodes}}) { - my ($ip, $port) = @$node; - push @return, "POOL ADD $name $ip:$port"; - } - } - return @return; -} - -# returns string of balance method -sub balance_method { - my Perlbal::Pool $self = $_[0]; - my $methods = { - &BM_ROUNDROBIN => "round_robin", - &BM_RANDOM => "random", - }; - return $methods->{$self->{balance_method}} || $self->{balance_method}; -} - -sub load_nodefile { - my Perlbal::Pool $self = shift; - return 0 unless $self->{'nodefile'}; - - if ($Perlbal::OPTMOD_LINUX_AIO) { - return $self->_load_nodefile_async; - } else { - return $self->_load_nodefile_sync; - } -} - -sub _parse_nodefile { - my Perlbal::Pool $self = shift; - my $dataref = shift; - - my @nodes = split(/\r?\n/, $$dataref); - - # prepare for adding nodes - $self->{nodes} = []; - $self->{node_used} = {}; - - foreach (@nodes) { - s/\#.*//; - if (/(\d+\.\d+\.\d+\.\d+)(?::(\d+))?/) { - my ($ip, $port) = ($1, $2); - $port ||= 80; - $self->{node_used}->{"$ip:$port"} ||= 0; # set to 0 if not set - push @{$self->{nodes}}, [ $ip, $port ]; - } - } - - # setup things using new data - $self->{node_count} = scalar @{$self->{nodes}}; -} - -sub _load_nodefile_sync { - my Perlbal::Pool $self = shift; - - my $mod = (stat($self->{nodefile}))[9]; - return if $mod == $self->{'nodefile.lastmod'}; - $self->{'nodefile.lastmod'} = $mod; - - open NODEFILE, $self->{nodefile} or return; - my $nodes; - { local $/ = undef; $nodes = ; } - close NODEFILE; - $self->_parse_nodefile(\$nodes); -} - -sub _load_nodefile_async { - my Perlbal::Pool $self = shift; - - return if $self->{'nodefile.checking'}; - $self->{'nodefile.checking'} = 1; - - Perlbal::AIO::aio_stat($self->{nodefile}, sub { - $self->{'nodefile.checking'} = 0; - - # this might have gotten unset while we were out statting the file, which - # means that the user has instructed us not to use a node file, and may - # have changed the nodes in the pool, so we should do nothing and return - return unless $self->{'nodefile'}; - - # ignore if the file doesn't exist - return unless -e _; - - my $mod = (stat(_))[9]; - return if $mod == $self->{'nodefile.lastmod'}; - $self->{'nodefile.lastmod'} = $mod; - - # construct a filehandle (we only have a descriptor here) - open NODEFILE, $self->{nodefile} - or return; - my $nodes; - { local $/ = undef; $nodes = ; } - close NODEFILE; - - $self->_parse_nodefile(\$nodes); - return; - }); - - return 1; -} - -sub add { - my Perlbal::Pool $self = shift; - my ($ip, $port) = @_; - - $self->remove($ip, $port); # no dupes - - $self->{node_used}->{"$ip:$port"} = 0; - push @{$self->{nodes}}, [ $ip, $port ]; - $self->{node_count} = scalar(@{$self->{nodes}}); -} - -sub remove { - my Perlbal::Pool $self = shift; - my ($ip, $port) = @_; - - delete $self->{node_used}->{"$ip:$port"}; - @{$self->{nodes}} = grep { "$_->[0]:$_->[1]" ne "$ip:$port" } @{$self->{nodes}}; - $self->{node_count} = scalar(@{$self->{nodes}}); -} - -sub get_backend_endpoint { - my Perlbal::Pool $self = $_[0]; - - my @endpoint; # (IP,port) - - # re-load nodefile if necessary - if ($self->{nodefile}) { - my $now = time; - if ($now > $self->{'nodefile.lastcheck'} + NODEFILE_RELOAD_FREQ) { - $self->{'nodefile.lastcheck'} = $now; - $self->load_nodefile; - } - } - - # no nodes? - return () unless $self->{node_count}; - - # pick one randomly - return @{$self->{nodes}[int(rand($self->{node_count}))]}; -} - -sub backend_should_live { - my Perlbal::Pool $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # a backend stays alive if we still have users. eventually this whole - # function might do more and actually take into account the individual - # backend, but for now, this suits us. - return 1 if $self->{use_count}; - return 0; -} - -sub node_count { - my Perlbal::Pool $self = $_[0]; - return $self->{node_count}; -} - -sub nodes { - my Perlbal::Pool $self = $_[0]; - return $self->{nodes}; -} - -sub node_used { - my Perlbal::Pool $self = $_[0]; - return $self->{node_used}->{$_[1]}; -} - -sub mark_node_used { - my Perlbal::Pool $self = $_[0]; - $self->{node_used}->{$_[1]}++; -} - -sub increment_use_count { - my Perlbal::Pool $self = $_[0]; - $self->{use_count}++; -} - -sub decrement_use_count { - my Perlbal::Pool $self = $_[0]; - $self->{use_count}--; -} - -sub name { - my Perlbal::Pool $self = $_[0]; - return $self->{name}; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ReproxyManager.pm b/lib/mogdeps/Perlbal/ReproxyManager.pm deleted file mode 100644 index 58c45aab..00000000 --- a/lib/mogdeps/Perlbal/ReproxyManager.pm +++ /dev/null @@ -1,255 +0,0 @@ -# HTTP connection to non-pool backend nodes (probably fast event-based webservers) -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ReproxyManager; -use strict; -use warnings; -no warnings qw(deprecated); - -# class storage to store 'host:ip' => $service objects, for making -# reproxies use a service that you can then track -our $ReproxySelf; -our %ReproxyConnecting; # ( host:ip => $backend ); keeps track of outstanding connections to backend that - # are in the connecting state -our %ReproxyBored; # ( host:ip => [ $backend, ... ] ); list of our bored backends -our %ReproxyQueues; # ( host:ip => [ $clientproxy, ... ] ); queued up requests for this backend -our %ReproxyBackends; # ( host:ip => [ $backend, ... ] ); array of backends we have connected -our %ReproxyMax; # ( host:ip => int ); maximum number of connections to have open at any one time -our $ReproxyGlobalMax; # int; the global cap used if no per-host cap is specified -our $NoSpawn = 0; # bool; when set, spawn_backend immediately returns without running -our $LastCleanup = 0; # int; time we last ran our cleanup logic (FIXME: temp hack) - -Perlbal::track_var("rep_connecting", \%ReproxyConnecting); -Perlbal::track_var("rep_bored", \%ReproxyBored); -Perlbal::track_var("rep_queues", \%ReproxyQueues); -Perlbal::track_var("rep_backends", \%ReproxyBackends); - -# singleton new function; returns us if we exist, else creates us -sub get { - return $ReproxySelf if $ReproxySelf; - - # doesn't exist, so create it and return it - my $class = shift; - my $self = {}; - bless $self, $class; - return $ReproxySelf = $self; -} - -# given (clientproxy, primary_res_hdrs), initiate proceedings to process a -# request for a reproxy resource -sub do_reproxy { - my Perlbal::ReproxyManager $self = Perlbal::ReproxyManager->get; # singleton - my Perlbal::ClientProxy $cp = $_[0]; - return undef unless $self && $cp; - - # get data we use - my $datref = $cp->{reproxy_uris}->[0]; - my $ipport = "$datref->[0]:$datref->[1]"; - push @{$ReproxyQueues{$ipport} ||= []}, $cp; - - # see if we should do cleanup (FIXME: temp hack) - my $now = time(); - if ($LastCleanup < $now - 5) { - # remove closed backends from our array. this is O(n) but n is small - # and we're paranoid that just keeping a count would get corrupt over - # time. also removes the backends that have clients that are closed. - @{$ReproxyBackends{$ipport}} = grep { - ! $_->{closed} && (! $_->{client} || ! $_->{client}->{closed}) - } @{$ReproxyBackends{$ipport}}; - - $LastCleanup = $now; - } - - # now start a new backend - $self->spawn_backend($ipport); - return 1; -} - -# part of the reportto interface; this is called when a backend is unable to establish -# a connection with a backend. we simply try the next uri. -sub note_bad_backend_connect { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # decrement counts and undef connecting backend - $ReproxyConnecting{$be->{ipport}} = undef; - - # if nobody waiting, doesn't matter if we couldn't get to this backend - return unless @{$ReproxyQueues{$be->{ipport}} || []}; - - # if we still have some connected backends then ignore this bad connection attempt - return if scalar @{$ReproxyBackends{$be->{ipport}} || []}; - - # at this point, we have no connected backends, and our connecting one failed - # so we want to tell all of the waiting clients to try their next uri, because - # this host is down. - while (my Perlbal::ClientProxy $cp = shift @{$ReproxyQueues{$be->{ipport}}}) { - $cp->try_next_uri; - } - return 1; -} - -# called by a backend when it's ready for a request -sub register_boredom { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # if this backend was connecting - my $ipport = $be->{ipport}; - if ($ReproxyConnecting{$ipport} && $ReproxyConnecting{$ipport} == $be) { - $ReproxyConnecting{$ipport} = undef; - $ReproxyBackends{$ipport} ||= []; - push @{$ReproxyBackends{$ipport}}, $be; - } - - # sometimes a backend is closed but it tries to register with us anyway... ignore it - # but since this might have been our only one, spawn another - if ($be->{closed}) { - $self->spawn_backend($ipport); - return; - } - - # find some clients to use - while (my Perlbal::ClientProxy $cp = shift @{$ReproxyQueues{$ipport} || []}) { - # safety checks - next if $cp->{closed}; - - # give backend to client - $cp->use_reproxy_backend($be); - return; - } - - # no clients if we get here, so push onto bored backend list - push @{$ReproxyBored{$ipport} ||= []}, $be; - - # clean up the front of our list if we can (see docs above) - if (my Perlbal::BackendHTTP $bbe = $ReproxyBored{$ipport}->[0]) { - if ($bbe->{alive_time} < time() - 5) { - $NoSpawn = 1; - $bbe->close('have_newer_bored'); - shift @{$ReproxyBored{$ipport}}; - $NoSpawn = 0; - } - } - return 0; -} - -# backend closed, decrease counts, etc -sub note_backend_close { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # remove closed backends from our array. this is O(n) but n is small - # and we're paranoid that just keeping a count would get corrupt over - # time. - @{$ReproxyBackends{$be->{ipport}}} = grep { - ! $_->{closed} - } @{$ReproxyBackends{$be->{ipport}}}; - - # spawn more if needed - $self->spawn_backend($be->{ipport}); -} - -sub spawn_backend { - return if $NoSpawn; - - my Perlbal::ReproxyManager $self = $_[0]; - my $ipport = $_[1]; - - # if we're already connecting, we don't want to spawn another one - if (my Perlbal::BackendHTTP $be = $ReproxyConnecting{$ipport}) { - # see if this one is too old? - if ($be->{create_time} < (time() - 5)) { # older than 5 seconds? - $self->note_bad_backend_connect($be); - $be->close("connection_timeout"); - - # we return here instead of spawning because closing the backend calls - # note_backend_close which will call spawn_backend again, and at that - # point we won't have a pending connection and can spawn - return; - } else { - # don't spawn more if we're already connecting - return; - } - } - - # if nobody waiting, don't spawn extra connections - return unless @{$ReproxyQueues{$ipport} || []}; - - # don't spawn if we have a bored one already - while (my Perlbal::BackendHTTP $bbe = pop @{$ReproxyBored{$ipport} || []}) { - - # don't use keep-alive connections if we know the server's - # just about to kill the connection for being idle - my $now = time(); - if ($bbe->{disconnect_at} && $now + 2 > $bbe->{disconnect_at} || - $bbe->{alive_time} < $now - 5) - { - $NoSpawn = 1; - $bbe->close("too_close_disconnect"); - $NoSpawn = 0; - next; - } - - # it's good, give it to someone - $self->register_boredom($bbe); - return; - } - - # see if we have too many already? - my $max = $ReproxyMax{$ipport} || $ReproxyGlobalMax || 0; - my $count = scalar @{$ReproxyBackends{$ipport} || []}; - return if $max && ($count >= $max); - - # start one connecting and enqueue - my $be = Perlbal::BackendHTTP->new(undef, split(/:/, $ipport), { reportto => $self }) - or return 0; - $ReproxyConnecting{$ipport} = $be; -} - -sub backend_response_received { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - my Perlbal::ClientProxy $cp = $be->{client}; - - # if no client, close backend and return 1 - unless ($cp) { - $be->close("lost_client"); - return 1; - } - - # pass on to client - return $cp->backend_response_received($be); -} - -sub dump_state { - my $out = shift; - return unless $out; - - # spits out what we have connecting - while (my ($hostip, $dat) = each %ReproxyConnecting) { - $out->("connecting $hostip 1") if defined $dat; - } - while (my ($hostip, $dat) = each %ReproxyBored) { - $out->("bored $hostip " . scalar(@$dat)); - } - while (my ($hostip, $dat) = each %ReproxyQueues) { - $out->("clients_queued $hostip " . scalar(@$dat)); - } - while (my ($hostip, $dat) = each %ReproxyBackends) { - $out->("backends $hostip " . scalar(@$dat)); - foreach my $be (@$dat) { - $out->("... " . $be->as_string); - } - } - while (my ($hostip, $dat) = each %ReproxyMax) { - $out->("SERVER max_reproxy_connections($hostip) = $dat"); - } - $out->("SERVER max_reproxy_connections = " . ($ReproxyGlobalMax || 0)); - $out->('.'); -} - -1; diff --git a/lib/mogdeps/Perlbal/Service.pm b/lib/mogdeps/Perlbal/Service.pm deleted file mode 100644 index 07d093ba..00000000 --- a/lib/mogdeps/Perlbal/Service.pm +++ /dev/null @@ -1,1686 +0,0 @@ -###################################################################### -# Service class -###################################################################### -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::Service; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal::BackendHTTP; -use Perlbal::Cache; -use Perlbal::Util; - -use fields ( - 'name', # scalar: name of this service - 'role', # scalar: role type 'web_server', 'reverse_proxy', etc... - 'enabled', # scalar: bool, whether we're enabled or not (enabled = listening) - - 'pool', # Perlbal::Pool that we're using to allocate nodes if we're in proxy mode - 'listener', # Perlbal::TCPListener object, when enabled - 'reproxy_cache', # Perlbal::Cache object, when enabled - - # end-user tunables - 'listen', # scalar IP:port of where we're listening for new connections - 'docroot', # document root for webserver role - 'dirindexing', # bool: directory indexing? (for webserver role) not async. - 'index_files', # arrayref of filenames to try for index files - 'enable_concatenate_get', # bool: if user can request concatenated files - 'enable_put', # bool: whether PUT is supported - 'max_put_size', # int: max size in bytes of a put file - 'max_chunked_request_size', # int: max size in bytes of a chunked request (to be written to disk first) - 'min_put_directory', # int: number of directories required to exist at beginning of URIs in put - 'enable_delete', # bool: whether DELETE is supported - 'high_priority_cookie', # cookie name to check if client can 'cut in line' and get backends faster - 'high_priority_cookie_contents', # aforementioned cookie value must contain this substring - 'backend_persist_cache', # scalar: max number of persistent backends to hold onto while no clients - 'persist_client', # bool: persistent connections for clients - 'persist_backend', # bool: persistent connections for backends - 'verify_backend', # bool: get attention of backend before giving it clients (using OPTIONS) - 'verify_backend_path', # path to check with the OPTIONS request (default *) - 'max_backend_uses', # max requests to send per kept-alive backend (default 0 = unlimited) - 'connect_ahead', # scalar: number of spare backends to connect to in advance all the time - 'buffer_size', # int: specifies how much data a ClientProxy object should buffer from a backend - 'buffer_size_reproxy_url', # int: same as above but for backends that are reproxying for us - 'queue_relief_size', # int; number of outstanding standard priority - # connections to activate pressure relief at - 'queue_relief_chance', # int:0-100; % chance to take a standard priority - # request when we're in pressure relief mode - 'trusted_upstream_proxies', # Net::Netmask object containing netmasks for trusted upstreams - 'always_trusted', # bool; if true, always trust upstreams - 'blind_proxy', # bool: if true, do not modify X-Forwarded-For, X-Host, or X-Forwarded-Host headers - 'enable_reproxy', # bool; if true, advertise that server will reproxy files and/or URLs - 'reproxy_cache_maxsize', # int; maximum number of reproxy results to be cached. (0 is disabled and default) - 'client_sndbuf_size', # int: bytes for SO_SNDBUF - 'server_process' , # scalar: path to server process (executable) - 'persist_client_timeout', # int: keep-alive timeout in seconds for clients (default is 30) - - # Internal state: - 'waiting_clients', # arrayref of clients waiting for backendhttp conns - 'waiting_clients_highpri', # arrayref of high-priority clients waiting for backendhttp conns - 'waiting_clients_lowpri', # arrayref of low-priority clients waiting for backendhttp conns - 'waiting_client_count', # number of clients waiting for backends - 'waiting_client_map' , # map of clientproxy fd -> 1 (if they're waiting for a conn) - 'pending_connects', # hashref of "ip:port" -> $time (only one pending connect to backend at a time) - 'pending_connect_count', # number of outstanding backend connects - 'bored_backends', # arrayref of backends we've already connected to, but haven't got clients - 'hooks', # hashref: hookname => [ [ plugin, ref ], [ plugin, ref ], ... ] - 'plugins', # hashref: name => 1 - 'plugin_order', # arrayref: name, name, name... - 'plugin_setters', # hashref: { plugin_name => { key_name => coderef } } - 'extra_config', # hashref: extra config options; name => values - 'spawn_lock', # bool: if true, we're currently in spawn_backends - 'extra_headers', # { insert => [ [ header, value ], ... ], remove => [ header, header, ... ], - # set => [ [ header, value ], ... ] }; used in header management interface - 'generation', # int; generation count so we can slough off backends from old pools - 'backend_no_spawn', # { "ip:port" => 1 }; if on, spawn_backends will ignore this ip:port combo - 'buffer_backend_connect', # 0 for of, else, number of bytes to buffer before we ask for a backend - 'selector', # CODE ref, or undef, for role 'selector' services - 'default_service', # Perlbal::Service; name of a service a selector should default to - 'buffer_uploads', # bool; enable/disable the buffered uploads to disk system - 'buffer_uploads_path', # string; path to store buffered upload files - 'buffer_upload_threshold_time', # int; buffer uploads estimated to take longer than this - 'buffer_upload_threshold_size', # int; buffer uploads greater than this size (in bytes) - 'buffer_upload_threshold_rate', # int; buffer uploads uploading at less than this rate (in bytes/sec) - - 'upload_status_listeners', # string: comma separated list of ip:port of UDP upload status receivers - 'upload_status_listeners_sockaddr', # arrayref of sockaddrs (packed ip/port) - - 'enable_ssl', # bool: whether this service speaks SSL to the client - 'ssl_key_file', # file: path to key pem file - 'ssl_cert_file', # file: path to key pem file - 'ssl_cipher_list', # OpenSSL cipher list string - - 'enable_error_retries', # bool: whether we should retry requests after errors - 'error_retry_schedule', # string of comma-separated seconds (full or partial) to delay between retries - 'latency', # int: milliseconds of latency to add to request - - # stats: - '_stat_requests', # total requests to this service - '_stat_cache_hits', # total requests to this service that were served via the reproxy-url cache - ); - -# hash; 'role' => coderef to instantiate a client for this role -our %PluginRoles; - -our $tunables = { - - 'role' => { - des => "What type of service. One of 'reverse_proxy' for a service that load balances to a pool of backend webserver nodes, 'web_server' for a typical webserver', 'management' for a Perlbal management interface (speaks both command-line or HTTP, auto-detected), or 'selector', for a virtual service that maps onto other services.", - required => 1, - - check_type => sub { - my ($self, $val, $errref) = @_; - return 0 unless $val; - return 1 if $val =~ /^(?:reverse_proxy|web_server|management|selector|upload_tracker)$/; - return 1 if $PluginRoles{$val}; - $$errref = "Role not valid for service $self->{name}"; - return 0; - }, - check_role => '*', - setter => sub { - my ($self, $val, $set, $mc) = @_; - my $rv = $set->(); - $self->init; # now that service role is set - return $rv; - }, - }, - - 'listen' => { - check_role => "*", - des => "The ip:port to listen on. For a service to work, you must either make it listen, or make another selector service map to a non-listening service.", - check_type => ["regexp", qr/^(\d+\.\d+\.\d+\.\d+:)?\d+$/, - "Listen argument must be ip:port or port. " . - "e.g. 192.168.0.1:80 or 81"], - setter => sub { - my ($self, $val, $set, $mc) = @_; - - # close/reopen listening socket - if ($val ne ($self->{listen} || "") && $self->{enabled}) { - $self->disable(undef, "force"); - $self->{listen} = $val; - $self->enable(undef); - } - - return $set->(); - }, - }, - - 'backend_persist_cache' => { - des => "The number of backend connections to keep alive on reserve while there are no clients.", - check_type => "int", - default => 2, - check_role => "reverse_proxy", - }, - - 'persist_client' => { - des => "Whether to enable HTTP keep-alives to the end user.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'persist_backend' => { - des => "Whether to enable HTTP keep-alives to the backend webnodes. (Off by default, but highly recommended if Perlbal will be the only client to your backends. If not, beware that Perlbal will hog the connections, starving other clients.)", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'verify_backend' => { - des => "Whether Perlbal should send a quick OPTIONS request to the backends before sending an actual client request to them. If your backend is Apache or some other process-based webserver, this is HIGHLY recommended. All too often a loaded backend box will reply to new TCP connections, but it's the kernel's TCP stack Perlbal is talking to, not an actual Apache process yet. Using this option reduces end-user latency a ton on loaded sites.", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'verify_backend_path' => { - des => "What path the OPTIONS request sent by verify_backend should use. Default is '*'.", - default => '*', - check_role => "reverse_proxy", - }, - - 'max_backend_uses' => { - check_role => "reverse_proxy", - des => "The max number of requests to be made on a single persistent backend connection before releasing the connection. The default value of 0 means no limit, and the connection will only be discarded once the backend asks it to be, or when Perlbal is sufficiently idle.", - default => 0, - }, - - 'max_put_size' => { - default => 0, # no limit - des => "The maximum content-length that will be accepted for a PUT request, if enable_put is on. Default value of 0 means no limit.", - check_type => "size", - check_role => "web_server", - }, - - 'max_chunked_request_size' => { - default => 209715200, # 200 MB. (0: no limit) - des => "The maximum size that will be accepted for a chunked request. Default is 200MB (which is written to disk, buffered uploads must be on). A value of 0 means no limit.", - check_type => "size", - check_role => "*", - }, - - 'buffer_size' => { - des => "How much we'll ahead of a client we'll get while copying from a backend to a client. If a client gets behind this much, we stop reading from the backend for a bit.", - default => "256k", - check_type => "size", - check_role => "reverse_proxy", - }, - - 'buffer_size_reproxy_url' => { - des => "How much we'll get ahead of a client we'll get while copying from a reproxied URL to a client. If a client gets behind this much, we stop reading from the reproxied URL for a bit. The default is lower than the regular buffer_size (50k instead of 256k) because it's assumed that you're only reproxying to large files on event-based webservers, which are less sensitive to many open connections, whereas the 256k buffer size is good for keeping heavy process-based free of slow clients.", - default => "50k", - check_type => "size", - check_role => "reverse_proxy", - }, - - 'queue_relief_size' => { - default => 0, - check_type => "int", - check_role => "reverse_proxy", - }, - - 'queue_relief_chance' => { - default => 0, - check_type => sub { - my ($self, $val, $errref) = @_; - return 1 if $val =~ /^\d+$/ && $val >= 0 && $val <= 100; - $$errref = "Expecting integer value between 0 and 100, inclusive."; - return 0; - }, - check_role => "reverse_proxy", - }, - - 'buffer_backend_connect' => { - des => "How much content-body (POST/PUT/etc) data we read from a client before we start sending it to a backend web node. If 'buffer_uploads' is enabled, this value is used to determine how many bytes are read before Perlbal makes a determination on whether or not to spool the upload to disk.", - default => '100k', - check_type => "size", - check_role => "reverse_proxy", - }, - - 'docroot' => { - des => "Directory root for web server.", - - check_role => "web_server", - val_modify => sub { my $valref = shift; $$valref =~ s!/$!!; }, - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -d $val; - $$errref = "Directory not found for service $self->{name}"; - return 0; - }, - }, - - 'enable_put' => { - des => "Enable HTTP PUT requests.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_delete' => { - des => "Enable HTTP DELETE requests.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_reproxy' => { - des => "Enable 'reproxying' (end-user-transparent internal redirects) to either local files or other URLs. When enabled, the backend servers in the pool that this service is configured for will have access to tell this Perlbal instance to serve any local readable file, or connect to any other URL that this Perlbal can connect to. Only enable this if you trust the backend web nodes.", - default => 0, - check_role => "reverse_proxy", - check_type => "bool", - }, - - 'reproxy_cache_maxsize' => { - des => "Set the maximum number of cached reproxy results (X-REPROXY-CACHE-FOR) that may be kept in the service cache. These cached requests take up about 1.25KB of ram each (on Linux x86), but will vary with usage. Perlbal still starts with 0 in the cache and will grow over time. Be careful when adjusting this and watch your ram usage like a hawk.", - default => 0, - check_role => "reverse_proxy", - check_type => "int", - setter => sub { - my ($self, $val, $set, $mc) = @_; - if ($val) { - $self->{reproxy_cache} ||= Perlbal::Cache->new(maxsize => 1); - $self->{reproxy_cache}->set_maxsize($val); - } else { - $self->{reproxy_cache} = undef; - } - return $mc->ok; - }, - }, - - 'upload_status_listeners' => { - des => "Comma separated list of hosts in form 'a.b.c.d:port' which will receive UDP upload status packets no faster than once a second per HTTP request (PUT/POST) from clients that have requested an upload status bar, which they request by appending the URL get argument ?client_up_session=[xxxxxx] where xxxxx is 5-50 'word' characters (a-z, A-Z, 0-9, underscore).", - default => "", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - my @packed; - foreach my $ipa (grep { $_ } split(/\s*,\s*/, $val)) { - unless ($ipa =~ /^(\d+\.\d+\.\d+\.\d+):(\d+)$/) { - $$errref = "Invalid UDP endpoint: \"$ipa\". Must be of form a.b.c.d:port"; - return 0; - } - push @packed, scalar Socket::sockaddr_in($2, Socket::inet_aton($1)); - } - $self->{upload_status_listeners_sockaddr} = \@packed; - return 1; - }, - }, - - 'min_put_directory' => { - des => "If PUT requests are enabled, require this many levels of directories to already exist. If not, fail.", - default => 0, # no limit - check_role => "web_server", - check_type => "int", - }, - - 'dirindexing' => { - des => "Show directory indexes when an HTTP request is for a directory. Warning: this is not an async operation, so will slow down Perlbal on heavily loaded sites.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_concatenate_get' => { - des => "Enable Perlbal's multiple-files-in-one-request mode, where a client have use a comma-separated list of files to return, always in text/plain. Useful for web apps which have dozens/hundreds of tiny css/js files, and don't trust browsers/etc to do pipelining. Decreases overall round-trip latency a bunch, but requires app to be modified to support it. See t/17-concat.t test for details.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'connect_ahead' => { - des => "How many extra backend connections we keep alive in addition to the current ones, in anticipation of new client connections.", - default => 0, - check_type => "int", - check_role => "reverse_proxy", - setter => sub { - my ($self, $val, $set, $mc) = @_; - my $rv = $set->(); - $self->spawn_backends if $self->{enabled}; - return $rv; - }, - }, - - 'always_trusted' => { - des => "Whether to trust all incoming requests' X-Forwarded-For and related headers. Set to true only if you know that all incoming requests from your own proxy servers that clean/set those headers.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'blind_proxy' => { - des => "Flag to disable any modification of X-Forwarded-For, X-Host, and X-Forwarded-Host headers.", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'high_priority_cookie' => { - des => "The cookie name to inspect to determine if the client goes onto the high-priority queue.", - check_role => "reverse_proxy", - }, - - 'high_priority_cookie_contents' => { - des => "A string that the high_priority_cookie must contain to go onto the high-priority queue.", - check_role => "reverse_proxy", - }, - - 'trusted_upstream_proxies' => { - des => "A Net::Netmask filter (e.g. 10.0.0.0/24, see Net::Netmask) that determines whether upstream clients are trusted or not, where trusted means their X-Forwarded-For/etc headers are not munged.", - check_role => "*", - check_type => sub { - my ($self, $val, $errref) = @_; - unless (my $loaded = eval { require Net::Netmask; 1; }) { - $$errref = "Net::Netmask not installed"; - return 0; - } - - return 1 if $self->{trusted_upstream_proxies} = Net::Netmask->new2($val); - $$errref = "Error defining trusted upstream proxies: " . Net::Netmask::errstr(); - return 0; - }, - setter => sub { - my ($self, $val, $set, $mc) = @_; - # Do nothing here, we don't want the default setter because we've - # already set the value in the type_check step. - return $mc->ok; - }, - }, - - 'index_files' => { - check_role => "web_server", - default => "index.html", - des => "Comma-separated list of filenames to load when a user visits a directory URL, listed in order of preference.", - setter => sub { - my ($self, $val, $set, $mc) = @_; - $self->{index_files} = [ split(/[\s,]+/, $val) ]; - return $mc->ok; - }, - dumper => sub { - my ($self, $val) = @_; - return join(', ', @$val); - }, - }, - - 'default_service' => { - des => "Name of previously-created service to default requests that aren't matched by a selector plugin to.", - check_role => "selector", - check_type => sub { - my ($self, $val, $errref) = @_; - - my $svc = Perlbal->service($val); - unless ($svc) { - $$errref = "Service '$svc' not found"; - return 0; - } - - $self->{default_service} = $svc; - return 1; - }, - setter => sub { - # override default so we don't set it to the text - return $_[3]->ok; - }, - }, - - 'pool' => { - des => "Name of previously-created pool object containing the backend nodes that this reverse proxy sends requests to.", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - my $pl = Perlbal->pool($val); - unless ($pl) { - $$errref = "Pool '$val' not found"; - return 0; - } - $self->{pool}->decrement_use_count if $self->{pool}; - $self->{pool} = $pl; - $self->{pool}->increment_use_count; - $self->{generation}++; - return 1; - }, - setter => sub { - my ($self, $val, $set, $mc) = @_; - # override the default, which is to set "pool" to the - # stringified name of the pool, but we already set it in - # the type-checking phase. instead, we do nothing here. - return $mc->ok; - }, - dumper => sub { - my ($self, $val) = @_; - return $val->name; - } - }, - - 'server_process' => { - des => "Executable which will be the HTTP server on stdin/stdout. (ALPHA, EXPERIMENTAL!)", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -x $val; - $$errref = "Server process ($val) not executable."; - return 0; - }, - }, - - 'persist_client_timeout' => { - des => "Timeout in seconds for HTTP keep-alives to the end user (default is 30)", - check_type => "int", - default => 30, - check_role => "*", - }, - - 'buffer_uploads_path' => { - des => "Directory root for storing files used to buffer uploads.", - - check_role => "reverse_proxy", - val_modify => sub { my $valref = shift; $$valref =~ s!/$!!; }, - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -d $val; - $$errref = "Directory ($val) not found for service $self->{name} (buffer_uploads_path)"; - return 0; - }, - }, - - 'buffer_uploads' => { - des => "Used to enable or disable the buffer uploads to disk system. If enabled, 'buffer_backend_connect' bytes worth of the upload will be stored in memory. At that point, the buffer upload thresholds will be checked to see if we should just send this upload to the backend, or if we should spool it to disk.", - default => 0, - check_role => "reverse_proxy", - check_type => "bool", - }, - - 'buffer_upload_threshold_time' => { - des => "If an upload is estimated to take more than this number of seconds, it will be buffered to disk. Set to 0 to not check estimated time.", - default => 5, - check_role => "reverse_proxy", - check_type => "int", - }, - - 'buffer_upload_threshold_size' => { - des => "If an upload is larger than this size in bytes, it will be buffered to disk. Set to 0 to not check size.", - default => '250k', - check_role => "reverse_proxy", - check_type => "size", - }, - - 'buffer_upload_threshold_rate' => { - des => "If an upload is coming in at a rate less than this value in bytes per second, it will be buffered to disk. Set to 0 to not check rate.", - default => 0, - check_role => "reverse_proxy", - check_type => "int", - }, - - 'latency' => { - des => "Forced latency (in milliseconds) to add to request.", - default => 0, - check_role => "selector", - check_type => "int", - }, - - 'enable_ssl' => { - des => "Enable SSL to the client.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'ssl_key_file' => { - des => "Path to private key PEM file for SSL.", - default => "certs/server-key.pem", - check_type => "file_or_none", - check_role => "*", - }, - - 'ssl_cert_file' => { - des => "Path to certificate PEM file for SSL.", - default => "certs/server-cert.pem", - check_type => "file_or_none", - check_role => "*", - }, - - 'ssl_cipher_list' => { - des => "OpenSSL-style cipher list.", - default => "ALL:!LOW:!EXP", - check_role => "*", - }, - - 'enable_error_retries' => { - des => 'Whether Perlbal should transparently retry requests to backends if a backend returns a 500 server error.', - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'error_retry_schedule' => { - des => 'String of comma-separated seconds (full or partial) to delay between retries. For example "0,2" would mean do at most two retries, the first zero seconds after the first failure, and the second 2 seconds after the 2nd failure. You probably don\'t need to modify the default value', - default => '0,.25,.50,1,1,1,1,1', - check_role => "reverse_proxy", - }, - - 'client_sndbuf_size' => { - des => "How large to set the client's socket SNDBUF.", - default => 0, - check_type => "size", - check_role => '*', - }, - - -}; -sub autodoc_get_tunables { return $tunables; } - -sub new { - my Perlbal::Service $self = shift; - $self = fields::new($self) unless ref $self; - - my ($name) = @_; - - $self->{name} = $name; - $self->{enabled} = 0; - $self->{extra_config} = {}; - - $self->{backend_no_spawn} = {}; - $self->{generation} = 0; - - $self->{hooks} = {}; - $self->{plugins} = {}; - $self->{plugin_order} = []; - - # track pending connects to backend - $self->{pending_connects} = {}; - $self->{pending_connect_count} = 0; - $self->{bored_backends} = []; - - # waiting clients - $self->{waiting_clients} = []; - $self->{waiting_clients_highpri} = []; - $self->{waiting_clients_lowpri} = []; - $self->{waiting_client_count} = 0; - $self->{waiting_client_map} = {}; - - # buffered upload setup - $self->{buffer_uploads_path} = undef; - - # don't have an object for this yet - $self->{trusted_upstream_proxies} = undef; - - # bare data structure for extra header info - $self->{extra_headers} = { remove => [], insert => [] }; - - # things to watch... - foreach my $v (qw(pending_connects bored_backends waiting_clients - waiting_clients_highpri backend_no_spawn - waiting_client_map - )) { - die "Field '$v' not set" unless $self->{$v}; - Perlbal::track_var("svc-$name-$v", $self->{$v}); - } - - return $self; -} - -# handy instance method to run some manage commands in the context of this service, -# without needing to worry about its name. -# This is intended as an internal API thing, so any output that would have been -# generated is just eaten. -sub run_manage_commands { - my ($self, $cmd_block) = @_; - - my $ctx = Perlbal::CommandContext->new; - $ctx->{last_created} = $self->name; - return Perlbal::run_manage_commands($cmd_block, undef, $ctx); -} - -# here's an alternative version of the above that runs a single command -sub run_manage_command { - my ($self, $cmd) = @_; - - my $ctx = Perlbal::CommandContext->new; - $ctx->{last_created} = $self->name; - return Perlbal::run_manage_command($cmd, undef, $ctx); -} - -sub dumpconfig { - my $self = shift; - - my @return; - - my %my_tunables = %$tunables; - - my $dump = sub { - my $setting = shift; - }; - - foreach my $skip (qw(role listen pool)) { - delete $my_tunables{$skip}; - } - - my $role = $self->{role}; - - foreach my $setting ("role", "listen", "pool", sort keys %my_tunables) { - my $attrs = $tunables->{$setting}; - my $value = $self->{$setting}; - - my $check_role = $attrs->{check_role}; - my $check_type = $attrs->{check_type}; - my $default = $attrs->{default}; - my $required = $attrs->{required}; - - next if ($check_role && $check_role ne '*' && $check_role ne $role); - - if ($check_type && $check_type eq 'size') { - $default = $1 if $default =~ /^(\d+)b$/i; - $default = $1 * 1024 if $default =~ /^(\d+)k$/i; - $default = $1 * 1024 * 1024 if $default =~ /^(\d+)m$/i; - } - - if (!$required) { - next unless defined $value; - next if (defined $default && $value eq $default); - } - - if (my $dumper = $attrs->{dumper}) { - $value = $dumper->($self, $value); - } - - if ($check_type && $check_type eq 'bool') { - $value = 'on' if $value; - } - - push @return, "SET $setting = $value"; - } - - my $plugins = $self->{plugins}; - - foreach my $plugin (keys %$plugins) { - local $@; - - my $class = "Perlbal::Plugin::$plugin"; - my $cv = $class->can('dumpconfig'); - - if ($cv) { - eval { push @return, $class->dumpconfig($self) }; - if ($@) { - push @return, "# Plugin '$plugin' threw an exception while being dumped."; - } - } else { - push @return, "# Plugin '$plugin' isn't capable of dumping config."; - } - } - - return @return; -} - -# called once a role has been set -sub init { - my Perlbal::Service $self = shift; - die "init called when no role" unless $self->{role}; - - # set all the defaults - for my $param (keys %$tunables) { - my $tun = $tunables->{$param}; - next unless $tun->{check_role} eq "*" || $tun->{check_role} eq $self->{role}; - next unless exists $tun->{default}; - $self->set($param, $tun->{default}); - } -} - -# Service -sub set { - my Perlbal::Service $self = shift; - my ($key, $val, $mc) = @_; - - # if you don't provide an $mc, that better mean you're damn sure it - # won't crash. (end-users never go this route) - $mc ||= Perlbal::ManageCommand->loud_crasher; - - my $set = sub { $self->{$key} = $val; return $mc->ok; }; - - my $pool_set = sub { - # if we don't have a pool, automatically create one named $NAME_pool - unless ($self->{pool}) { - # die if necessary - die "ERROR: Attempt to vivify pool $self->{name}_pool but one or more pools\n" . - " have already been created manually. Please set $key on a\n" . - " previously created pool.\n" unless $Perlbal::vivify_pools; - - # create the pool and ensure that vivify stays on - Perlbal::run_manage_command("CREATE POOL $self->{name}_pool", $mc->out); - Perlbal::run_manage_command("SET $self->{name}.pool = $self->{name}_pool"); - $Perlbal::vivify_pools = 1; - } - - # now we actually do the set - warn "WARNING: '$key' set on service $self->{name} on auto-vivified pool.\n" . - " This behavior is obsolete. This value should be set on a\n" . - " pool object and not on a service.\n" if $Perlbal::vivify_pools; - return $mc->err("No pool defined for service") unless $self->{pool}; - return $self->{pool}->set($key, $val, $mc); - }; - - # this is now handled by Perlbal::Pool, so we pass this set command on - # through in case people try to use it on us like the old method. - return $pool_set->() - if $key eq 'nodefile' || - $key eq 'balance_method'; - - my $bool = sub { - my $val = shift; - return 1 if $val =~ /^1|true|on|yes$/i; - return 0 if $val =~ /^0|false|off|no$/i; - return undef; - }; - - if (my $tun = $tunables->{$key}) { - if (my $req_role = $tun->{check_role}) { - return $mc->err("The '$key' option can only be set on a '$req_role' service") - unless ($self->{role}||"") eq $req_role || $req_role eq "*"; - } - - if (my $req_type = $tun->{check_type}) { - if (ref $req_type eq "ARRAY" && $req_type->[0] eq "enum") { - return $mc->err("Value of '$key' must be one of: " . join(", ", @{$req_type->[1]})) - unless grep { $val eq $_ } @{$req_type->[1]}; - } elsif (ref $req_type eq "ARRAY" && $req_type->[0] eq "regexp") { - my $re = $req_type->[1]; - my $emsg = $req_type->[2]; - return $mc->err($emsg) unless $val =~ /$re/; - } elsif (ref $req_type eq "CODE") { - my $emsg = ""; - return $mc->err($emsg) unless $req_type->($self, $val, \$emsg); - } elsif ($req_type eq "bool") { - $val = $bool->($val); - return $mc->err("Expecting boolean value for parameter '$key'") - unless defined $val; - } elsif ($req_type eq "int") { - return $mc->err("Expecting integer value for parameter '$key'") - unless $val =~ /^\d+$/; - } elsif ($req_type eq "size") { - $val = $1 if $val =~ /^(\d+)b$/i; - $val = $1 * 1024 if $val =~ /^(\d+)k$/i; - $val = $1 * 1024 * 1024 if $val =~ /^(\d+)m$/i; - return $mc->err("Expecting size unit value for parameter '$key' in bytes, or suffixed with 'K' or 'M'") - unless $val =~ /^\d+$/; - } elsif ($req_type eq "file") { - return $mc->err("File '$val' not found for '$key'") unless -f $val; - } elsif ($req_type eq "file_or_none") { - return $mc->err("File '$val' not found for '$key'") unless -f $val || $val eq $tun->{default}; - } else { - die "Unknown check_type: $req_type\n"; - } - } - - my $setter = $tun->{setter}; - - if (ref $setter eq "CODE") { - return $setter->($self, $val, $set, $mc); - } elsif ($tun->{_plugin_inserted}) { - # plugins that add tunables need to be stored in the extra_config hash due to the main object - # using fields. this passthrough is done so the config files don't need to specify this. - $self->{extra_config}->{$key} = $val; - return $mc->ok; - } else { - return $set->(); - } - } - - if ($key eq 'plugins') { - # unload existing plugins - foreach my $plugin (keys %{$self->{plugins}}) { - eval "Perlbal::Plugin::$plugin->unregister(\$self);"; - return $mc->err($@) if $@; - } - - # clear out loaded plugins and hooks - $self->{hooks} = {}; - $self->{plugins} = {}; - $self->{plugin_order} = []; - - # load some plugins - foreach my $plugin (split /[\s,]+/, $val) { - next if $plugin eq 'none'; - - my $fn = Perlbal::plugin_case($plugin); - - next if $self->{plugins}->{$fn}; - unless ($Perlbal::plugins{$fn}) { - $mc->err("Plugin $fn not loaded; not registered for $self->{name}."); - next; - } - - # now register it - eval "Perlbal::Plugin::$fn->register(\$self);"; - return $mc->err($@) if $@; - $self->{plugins}->{$fn} = 1; - push @{$self->{plugin_order}}, $fn; - } - return $mc->ok; - } - - if ($key =~ /^extra\.(.+)$/) { - # set some extra configuration data data - $self->{extra_config}->{$1} = $val; - return $mc->ok; - } - - # see if it happens to be a plugin set command? - if ($key =~ /^(.+)\.(.+)$/) { - if (my $coderef = $self->{plugin_setters}->{$1}->{$2}) { - return $coderef->($mc->out, $2, $val); - } - } - - return $mc->err("Unknown service parameter '$key'"); -} - -# CLASS METHOD - -# used by plugins that want to add tunables so that the config file -# can have more options for service settings -sub add_tunable { - my ($name, $hashref) = @_; - return 0 unless $name && $hashref && ref $hashref eq 'HASH'; - return 0 if $tunables->{$name}; - $hashref->{_plugin_inserted} = 1; # mark that a plugin did this - $tunables->{$name} = $hashref; - return 1; -} - -# CLASS METHOD - -# remove a defined tunable, but only if a plugin is what created it -sub remove_tunable { - my $name = shift; - my $tun = $tunables->{$name} or return 0; - return 0 unless $tun->{_plugin_inserted}; - delete $tunables->{$name}; - return 1; -} - -# CLASS METHOD - -# used by plugins to define a new role that services can take on -sub add_role { - my ($role, $creator) = @_; - return 0 unless $role && $creator && ref $creator eq 'CODE'; - return 0 if $PluginRoles{$role}; - $PluginRoles{$role} = $creator; - return 1; -} - -# CLASS METHOD - -# remove a defined plugin role -sub remove_role { - return 0 unless delete $PluginRoles{$_[0]}; - return 1; -} - -# CLASS METHOD - -# returns a defined role creator, if it exists. (undef if it does not) -sub get_role_creator { - return $PluginRoles{$_[0]}; -} - -# run the hooks in a list one by one until one hook returns a true -# value. returns 1 or 0 depending on if any hooks handled the -# request. -sub run_hook { - my Perlbal::Service $self = shift; - my $hook = shift; - if (defined (my $ref = $self->{hooks}->{$hook})) { - # call all the hooks until one returns true - foreach my $hookref (@$ref) { - my $rval = $hookref->[1]->(@_); - return 1 if $rval; - } - } - return 0; -} - -# run a bunch of hooks in this service, always returns undef. -sub run_hooks { - my Perlbal::Service $self = shift; - my $hook = shift; - if (defined (my $ref = $self->{hooks}->{$hook})) { - # call all the hooks - $_->[1]->(@_) foreach @$ref; - } - return undef; -} - -# define a hook for this service -sub register_hook { - my Perlbal::Service $self = shift; - my ($pclass, $hook, $ref) = @_; - push @{$self->{hooks}->{$hook} ||= []}, [ $pclass, $ref ]; - return 1; -} - -# remove hooks we have defined -sub unregister_hook { - my Perlbal::Service $self = shift; - my ($pclass, $hook) = @_; - if (defined (my $refs = $self->{hooks}->{$hook})) { - my @new; - foreach my $ref (@$refs) { - # fill @new with hooks that DON'T match - push @new, $ref - unless $ref->[0] eq $pclass; - } - $self->{hooks}->{$hook} = \@new; - return 1; - } - return undef; -} - -# remove all hooks of a certain class -sub unregister_hooks { - my Perlbal::Service $self = shift; - foreach my $hook (keys %{$self->{hooks}}) { - # call unregister_hook with this hook name - $self->unregister_hook($_[0], $hook); - } -} - -# register a value setter for plugin configuration -sub register_setter { - my Perlbal::Service $self = shift; - my ($pclass, $key, $coderef) = @_; - return unless $pclass && $key && $coderef; - $self->{plugin_setters}->{lc $pclass}->{lc $key} = $coderef; -} - -# remove a setter -sub unregister_setter { - my Perlbal::Service $self = shift; - my ($pclass, $key) = @_; - return unless $pclass && $key; - delete $self->{plugin_setters}->{lc $pclass}->{lc $key}; -} - -# remove a bunch of setters -sub unregister_setters { - my Perlbal::Service $self = shift; - my $pclass = shift; - return unless $pclass; - delete $self->{plugin_setters}->{lc $pclass}; -} - -# take a backend we've created and mark it as pending if we do not -# have another pending backend connection in this slot -sub add_pending_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - - # error if we already have a pending connection for this ipport - if (defined $self->{pending_connects}{$be->{ipport}}) { - Perlbal::log('warning', "Warning: attempting to spawn backend connection that already existed."); - - # now dump a backtrace so we know how we got here - my $depth = 0; - while (my ($package, $filename, $line, $subroutine) = caller($depth++)) { - Perlbal::log('warning', " -- [$filename:$line] $package::$subroutine"); - } - - # we're done now, just return - return; - } - - # set this connection up in the pending connection list - $self->{pending_connects}{$be->{ipport}} = $be; - $self->{pending_connect_count}++; -} - -# remove a backend connection from the pending connect list if and only -# if it is the actual connection contained in the list; prevent double -# decrementing on accident -sub clear_pending_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - if (defined $self->{pending_connects}{$be->{ipport}} && defined $be && - $self->{pending_connects}{$be->{ipport}} == $be) { - $self->{pending_connects}{$be->{ipport}} = undef; - $self->{pending_connect_count}--; - } -} - -# called by BackendHTTP when it's closed by any means -sub note_backend_close { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - $self->clear_pending_connect($be); - $self->spawn_backends; -} - -# called by ClientProxy when it dies. -sub note_client_close { - my Perlbal::Service $self; - my Perlbal::ClientProxy $cp; - ($self, $cp) = @_; - - if (delete $self->{waiting_client_map}{$cp->{fd}}) { - $self->{waiting_client_count}--; - } -} - -sub mark_node_used { - my Perlbal::Service $self = $_[0]; - $self->{pool}->mark_node_used($_[1]) if $self->{pool}; -} - -sub get_client { - my Perlbal::Service $self = shift; - - my $ret = sub { - my Perlbal::ClientProxy $cp = shift; - $self->{waiting_client_count}--; - delete $self->{waiting_client_map}{$cp->{fd}}; - - # before we return, start another round of connections - $self->spawn_backends; - - return $cp; - }; - - # determine if we should jump straight to the high priority queue or - # act as pressure relief on the standard queue - my $hp_first = 1; - if (($self->{queue_relief_size} > 0) && - (scalar(@{$self->{waiting_clients}}) >= $self->{queue_relief_size})) { - # if we're below the chance level, take a standard queue item - $hp_first = 0 - if rand(100) < $self->{queue_relief_chance}; - } - - # find a high-priority client, or a regular one - my Perlbal::ClientProxy $cp; - while ($hp_first && ($cp = shift @{$self->{waiting_clients_highpri}})) { - next if $cp->{closed}; - if (Perlbal::DEBUG >= 2) { - my $backlog = scalar @{$self->{waiting_clients}}; - print "Got from fast queue, in front of $backlog others\n"; - } - return $ret->($cp); - } - - # regular clients: - while ($cp = shift @{$self->{waiting_clients}}) { - next if $cp->{closed}; - print "Backend requesting client, got normal = $cp->{fd}.\n" if Perlbal::DEBUG >= 2; - return $ret->($cp); - } - - # low-priority (batch/idle) clients. - while ($cp = shift @{$self->{waiting_clients_lowpri}}) { - next if $cp->{closed}; - print "Backend requesting client, got low priority = $cp->{fd}.\n" if Perlbal::DEBUG >= 2; - return $ret->($cp); - } - - return undef; -} - -# given a backend, verify it's generation -sub verify_generation { - my Perlbal::Service $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # fast cases: generation count matches, so we just return an 'okay!' flag - return 1 if $self->{generation} == $be->generation; - - # if our current pool knows about this ip:port, then we can still use it - if (defined $self->{pool}->node_used($be->ipport)) { - # so we know this is good, in the future we just want to hit the fast case - # and continue, so let's update the generation - $be->generation($self->{generation}); - return 1; - } - - # if we get here, the backend should be closed - $be->close('invalid_generation'); - return 0; -} - -# called by backend connection after it becomes writable -sub register_boredom { - my Perlbal::Service $self; - my Perlbal::BackendHTTP $be; - ($self, $be) = @_; - - # note that this backend is no longer pending a connect, - # if we thought it was before. but not if it's a persistent - # connection asking to be re-used. - unless ($be->{use_count}) { - $self->clear_pending_connect($be); - } - - # it is possible that this backend is part of a different pool that we're - # no longer using... if that's the case, we want to close it - return unless $self->verify_generation($be); - - # now try to fetch a client for it - my Perlbal::ClientProxy $cp = $self->get_client; - if ($cp) { - return if $be->assign_client($cp); - - # don't want to lose client, so we (unfortunately) - # stick it at the end of the waiting queue. - # fortunately, assign_client shouldn't ever fail. - $self->request_backend_connection($cp); - } - - # don't hang onto more bored, persistent connections than - # has been configured for connect-ahead - if ($be->{use_count}) { - my $current_bored = scalar @{$self->{bored_backends}}; - if ($current_bored >= $self->{backend_persist_cache}) { - $be->close('too_many_bored'); - return; - } - } - - # put backends which are known to be bound to processes - # and not to TCP stacks at the beginning where they'll - # be used first - if ($be->{has_attention}) { - unshift @{$self->{bored_backends}}, $be; - } else { - push @{$self->{bored_backends}}, $be; - } -} - -sub note_bad_backend_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - my $retry_time = shift(); - - # clear this pending connection - $self->clear_pending_connect($be); - - # mark this host as dead for a while if we need to - if (defined $retry_time && $retry_time > 0) { - # we don't want other spawn_backends calls to retry - $self->{backend_no_spawn}->{$be->{ipport}} = 1; - - # and now we set a callback to ensure we're kicked at the right time - Perlbal::Socket::register_callback($retry_time, sub { - delete $self->{backend_no_spawn}->{$be->{ipport}}; - $self->spawn_backends; - }); - } - - # FIXME: do something interesting (tell load balancer about dead host, - # and fire up a new connection, if warranted) - - # makes a new connection, if needed - $self->spawn_backends; -} - -sub request_backend_connection { # : void - my Perlbal::Service $self; - my Perlbal::ClientProxy $cp; - ($self, $cp) = @_; - - return unless $cp && ! $cp->{closed}; - - my $hi_pri = $cp->{high_priority}; # load values from the client proxy object - my $low_pri = $cp->{low_priority}; # they are initialized as 0 during object creation, but hooks can override them - - # is there a defined high-priority cookie? - if (my $cname = $self->{high_priority_cookie}) { - # decide what priority class this request is in - my $hd = $cp->{req_headers}; - my %cookie; - foreach (split(/;\s+/, $hd->header("Cookie") || '')) { - next unless ($_ =~ /(.*)=(.*)/); - $cookie{Perlbal::Util::durl($1)} = Perlbal::Util::durl($2); - } - my $hicookie = $cookie{$cname} || ""; - $hi_pri = index($hicookie, $self->{high_priority_cookie_contents}) != -1; - } - - # now, call hook to see if this should be high priority - $hi_pri = $self->run_hook('make_high_priority', $cp) - unless $hi_pri; # only if it's not already - - # and then, call hook to see about low priority - $low_pri = $self->run_hook('make_low_priority', $cp) - unless $hi_pri || $low_pri; # only if it's not high or low already - - $cp->{high_priority} = 1 if $hi_pri; - $cp->{low_priority} = 1 if $low_pri; - - # before we even consider spawning backends, let's see if we have - # some bored (pre-connected) backends that'd take this client - my Perlbal::BackendHTTP $be; - my $now = time; - while ($be = shift @{$self->{bored_backends}}) { - next if $be->{closed}; - - # now make sure that it's still in our pool, and if not, close it - next unless $self->verify_generation($be); - - # don't use connect-ahead connections when we haven't - # verified we have their attention - if (! $be->{has_attention} && $be->{create_time} < $now - 5) { - $be->close("too_old_bored"); - next; - } - - # don't use keep-alive connections if we know the server's - # just about to kill the connection for being idle - if ($be->{disconnect_at} && $now + 2 > $be->{disconnect_at}) { - $be->close("too_close_disconnect"); - next; - } - - # give the backend this client - if ($be->assign_client($cp)) { - # and make some extra bored backends, if configured as such - $self->spawn_backends; - return; - } - - # assign client can end up closing the connection, so check for that - return if $cp->{closed}; - } - - if ($hi_pri) { - push @{$self->{waiting_clients_highpri}}, $cp; - } elsif ($low_pri) { - push @{$self->{waiting_clients_lowpri}}, $cp; - } else { - push @{$self->{waiting_clients}}, $cp; - } - - $self->{waiting_client_count}++; - $self->{waiting_client_map}{$cp->{fd}} = 1; - - $self->spawn_backends; -} - -# sees if it should spawn one or more backend connections -sub spawn_backends { - my Perlbal::Service $self = shift; - - # check our lock and set it if we can - return if $self->{spawn_lock}; - $self->{spawn_lock} = 1; - - # sanity checks on our bookkeeping - if ($self->{pending_connect_count} < 0) { - Perlbal::log('crit', "Bogus: service $self->{name} has pending connect ". - "count of $self->{pending_connect_count}?! Resetting."); - $self->{pending_connect_count} = scalar - map { $_ && ! $_->{closed} } values %{$self->{pending_connects}}; - } - - # keep track of the sum of existing_bored + bored_created - my $backends_created = scalar(@{$self->{bored_backends}}) + $self->{pending_connect_count}; - my $backends_needed = $self->{waiting_client_count} + $self->{connect_ahead}; - my $to_create = $backends_needed - $backends_created; - - my $pool = $self->{pool}; - - # can't create more than this, assuming one pending connect per node - my $max_creatable = $pool ? ($self->{pool}->node_count - $self->{pending_connect_count}) : 1; - $to_create = $max_creatable if $to_create > $max_creatable; - - # cap number of attempted connects at once - $to_create = 10 if $to_create > 10; - - my $now = time; - - while ($to_create > 0) { - $to_create--; - - # spawn processes if not a pool, else whine. - unless ($pool) { - if (my $sp = $self->{server_process}) { - warn "To create = $to_create...\n"; - warn " spawning $sp\n"; - my $be = Perlbal::BackendHTTP->new_process($self, $sp); - return; - } - warn "No pool! Can't spawn backends.\n"; - return; - } - - my ($ip, $port) = $self->{pool}->get_backend_endpoint; - unless ($ip) { - Perlbal::log('crit', "No backend IP for service $self->{name}"); - # FIXME: register desperate flag, so load-balancer module can callback when it has a node - $self->{spawn_lock} = 0; - return; - } - - # handle retry timeouts so we don't spin - next if $self->{backend_no_spawn}->{"$ip:$port"}; - - # if it's pending, verify the pending one is still valid - if (my Perlbal::BackendHTTP $be = $self->{pending_connects}{"$ip:$port"}) { - my $age = $now - $be->{create_time}; - if ($age >= 5 && $be->{state} eq "connecting") { - $be->close('connect_timeout'); - } elsif ($age >= 60 && $be->{state} eq "verifying_backend") { - # after 60 seconds of attempting to verify, we're probably already dead - $be->close('verify_timeout'); - } elsif (! $be->{closed}) { - next; - } - } - - # now actually spawn a backend and add it to our pending list - if (my $be = Perlbal::BackendHTTP->new($self, $ip, $port, { pool => $self->{pool} })) { - $self->add_pending_connect($be); - } - } - - # clear our spawn lock - $self->{spawn_lock} = 0; -} - -# getter only -sub role { - my Perlbal::Service $self = shift; - return $self->{role}; -} - -# called by BackendHTTP to ask if a client's IP is in our trusted list -sub trusted_ip { - my Perlbal::Service $self = shift; - my $ip = shift; - - return 1 if $self->{'always_trusted'}; - - my $tmap = $self->{trusted_upstream_proxies}; - return 0 unless $tmap; - - # try to use it as a Net::Netmask object - return 1 if eval { $tmap->match($ip); }; - return 0; -} - -# manage some header stuff -sub header_management { - my Perlbal::Service $self = shift; - my ($mode, $key, $val, $mc) = @_; - return $mc->err("no header provided") unless $key; - return $mc->err("no value provided") unless $val || $mode eq 'remove'; - return $mc->err("only valid on reverse_proxy services") unless $self->{role} eq 'reverse_proxy'; - - if ($mode eq 'insert') { - push @{$self->{extra_headers}->{insert}}, [ $key, $val ]; - } elsif ($mode eq 'remove') { - push @{$self->{extra_headers}->{remove}}, $key; - } - return $mc->ok; -} - -sub munge_headers { - my Perlbal::Service $self = $_[0]; - my Perlbal::HTTPHeaders $hdrs = $_[1]; - - # handle removals first - foreach my $hdr (@{$self->{extra_headers}->{remove}}) { - $hdrs->header($hdr, undef); - } - - # and now insertions - foreach my $hdr (@{$self->{extra_headers}->{insert}}) { - $hdrs->header($hdr->[0], $hdr->[1]); - } -} - -# getter/setter -sub selector { - my Perlbal::Service $self = shift; - if (@_) { - my $ref = shift; - $self->{selector} = sub { - my $cb = shift; - - # try to give it to our defined selector - my $res = $ref->($cb); - - # if that failed and we have a default, then give it to them - if (!$res && $self->{default_service}) { - $self->{default_service}->adopt_base_client($cb); - return 1; - } - - return $res; - }; - } - return $self->{selector}; -} - -# given a base client from a 'selector' role, down-cast it to its specific type -sub adopt_base_client { - my Perlbal::Service $self = shift; - my Perlbal::ClientHTTPBase $cb = shift; - - $cb->{service} = $self; - - if ($self->{'role'} eq "web_server") { - Perlbal::ClientHTTP->new_from_base($cb); - return; - } elsif ($self->{'role'} eq "reverse_proxy") { - Perlbal::ClientProxy->new_from_base($cb); - return; - } elsif ($self->{'role'} eq "selector") { - $self->selector()->($cb); - return; - } else { - $cb->_simple_response(500, "Can't map to service type $self->{'role'}"); - } -} - -# turn a ClientProxy or ClientHTTP back into a generic base client -# (for a service-selector role) -sub return_to_base { - my Perlbal::Service $self = shift; - my Perlbal::ClientHTTPBase $cb = shift; # actually a subclass of Perlbal::ClientHTTPBase - - $cb->{service} = $self; - Perlbal::Util::rebless($cb, "Perlbal::ClientHTTPBase"); - - # the read/watch events are reset by ClientHTTPBase's http_response_sent (our caller) -} - -# Service -sub enable { - my Perlbal::Service $self; - my $mc; - - ($self, $mc) = @_; - - if ($self->{enabled}) { - $mc && $mc->err("service $self->{name} is already enabled"); - return 0; - } - - my $listener; - - # create UDP upload tracker listener - if ($self->{role} eq "upload_tracker") { - $listener = Perlbal::UploadListener->new($self->{listen}, $self); - } - - # create TCP listening socket - if (! $listener && $self->{listen}) { - my $opts = {}; - if ($self->{enable_ssl}) { - $opts->{ssl} = { - SSL_key_file => $self->{ssl_key_file}, - SSL_cert_file => $self->{ssl_cert_file}, - SSL_cipher_list => $self->{ssl_cipher_list}, - }; - return $mc->err("IO::Socket:SSL (0.97+) not available. Can't do SSL.") unless eval "use IO::Socket::SSL 0.97 (); 1;"; - return $mc->err("SSL key file ($self->{ssl_key_file}) doesn't exist") unless -f $self->{ssl_key_file}; - return $mc->err("SSL cert file ($self->{ssl_cert_file}) doesn't exist") unless -f $self->{ssl_cert_file}; - } - - my $tl = Perlbal::TCPListener->new($self->{listen}, $self, $opts); - unless ($tl) { - $mc && $mc->err("Can't start service '$self->{name}' on $self->{listen}: $Perlbal::last_error"); - return 0; - } - $listener = $tl; - } - - $self->{listener} = $listener; - $self->{enabled} = 1; - return $mc ? $mc->ok : 1; -} - -# Service -sub disable { - my Perlbal::Service $self; - my ($mc, $force); - - ($self, $mc, $force) = @_; - - if (! $self->{enabled}) { - $mc && $mc->err("service $self->{name} is already disabled"); - return 0; - } - if ($self->{role} eq "management" && ! $force) { - $mc && $mc->err("can't disable management service"); - return 0; - } - - # find listening socket - my $tl = $self->{listener}; - $tl->close if $tl; - $self->{listener} = undef; - $self->{enabled} = 0; - return $mc ? $mc->ok : 1; -} - -sub stats_info -{ - my Perlbal::Service $self = shift; - my $out = shift; - my $now = time; - - $out->("SERVICE $self->{name}"); - $out->(" listening: " . ($self->{listen} || "--")); - $out->(" role: $self->{role}"); - if ($self->{role} eq "reverse_proxy" || - $self->{role} eq "web_server") { - $out->(" pend clients: $self->{waiting_client_count}"); - $out->(" pend backend: $self->{pending_connect_count}"); - foreach my $ipport (sort keys %{$self->{pending_connects}}) { - my $be = $self->{pending_connects}{$ipport}; - next unless $be; - my $age = $now - $be->{create_time}; - $out->(" $ipport - " . ($be->{closed} ? "(closed)" : $be->{state}) . " - ${age}s"); - } - } - if ($self->{role} eq "reverse_proxy") { - if ($self->{reproxy_cache}) { - my $hits = $self->{_stat_cache_hits} || 0; - my $hit_rate = sprintf("%0.02f%%", eval { $hits / ($self->{_stat_requests} || 0) * 100 } || 0); - - my $size = eval { $self->{reproxy_cache}->size }; - $size = defined($size) ? $size : 'undef'; - - my $maxsize = eval { $self->{reproxy_cache}->maxsize }; - $maxsize = defined ($maxsize) ? $maxsize : 'undef'; - - my $sizepercent = eval { sprintf("%0.02f%%", $size / $maxsize * 100) } || 'undef'; - - $out->(" cache size: $size/$maxsize ($sizepercent)"); - $out->(" cache hits: $hits"); - $out->("cache hit rate: $hit_rate"); - } - - my $bored_count = scalar @{$self->{bored_backends}}; - $out->(" connect-ahead: $bored_count/$self->{connect_ahead}"); - if ($self->{pool}) { - $out->(" pool: " . $self->{pool}->name); - $out->(" nodes:"); - foreach my $n (@{ $self->{pool}->nodes }) { - my $hostport = "$n->[0]:$n->[1]"; - $out->(sprintf(" %-21s %7d", $hostport, $self->{pool}->node_used($hostport) || 0)); - } - } - } elsif ($self->{role} eq "web_server") { - $out->(" docroot: $self->{docroot}"); - } -} - -# simple passthroughs to the run_hook mechanism. part of the reportto interface. -sub backend_response_received { - return $_[0]->run_hook('backend_response_received', $_[1]); -} - -# just a getter for our name -sub name { - my Perlbal::Service $self = $_[0]; - return $self->{name}; -} - -sub listenaddr { - my Perlbal::Service $self = $_[0]; - return $self->{listen}; -} - -sub reproxy_cache { - my Perlbal::Service $self = $_[0]; - return $self->{reproxy_cache}; -} - -sub add_to_reproxy_url_cache { - my Perlbal::Service $self; - my ($reqhd, $reshd); - - ($self, $reqhd, $reshd) = @_; - - # is caching enabled on this service? - my $cache = $self->{reproxy_cache} or - return 0; - - # these should always be set anyway, from BackendHTTP: - my $reproxy_cache_for = $reshd->header('X-REPROXY-CACHE-FOR') or return 0; - my $urls = $reshd->header('X-REPROXY-URL') or return 0; - - my ($timeout_delta, $cache_headers) = split ';', $reproxy_cache_for, 2; - my $timeout = $timeout_delta ? time() + $timeout_delta : undef; - - my $hostname = $reqhd->header("Host") || ''; - my $requri = $reqhd->request_uri || ''; - my $key = "$hostname|$requri"; - - my @headers; - foreach my $header (split /\s+/, $cache_headers) { - my $value; - next unless $header && ($value = $reshd->header($header)); - $value = _ref_to($value) if uc($header) eq 'CONTENT-TYPE'; - push @headers, _ref_to($header), $value; - } - - $cache->set($key, [$timeout, \@headers, $urls]); -} - -# given a string, return a shared reference to that string. to save -# memory when lots of same string is stored. -my %refs; -sub _ref_to { - my $key = shift; - return $refs{$key} || ($refs{$key} = \$key); -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Socket.pm b/lib/mogdeps/Perlbal/Socket.pm deleted file mode 100644 index 1a5d5a26..00000000 --- a/lib/mogdeps/Perlbal/Socket.pm +++ /dev/null @@ -1,390 +0,0 @@ -# Base class for all socket types -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::Socket; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal::HTTPHeaders; - -use Sys::Syscall; -use POSIX (); - -use Danga::Socket 1.44; -use base 'Danga::Socket'; - -use fields ( - 'headers_string', # headers as they're being read - - 'req_headers', # the final Perlbal::HTTPHeaders object inbound - 'res_headers', # response headers outbound (Perlbal::HTTPHeaders object) - - 'create_time', # creation time - 'alive_time', # last time noted alive - 'state', # general purpose state; used by descendants. - 'do_die', # if on, die and do no further requests - - 'read_buf', # arrayref of scalarref read from client - 'read_ahead', # bytes sitting in read_buf - 'read_size', # total bytes read from client, ever - - 'ditch_leading_rn', # if true, the next header parsing will ignore a leading \r\n - - 'observed_ip_string', # if defined, contains the observed IP string of the peer - # we're serving. this is intended for hoding the value of - # the X-Forwarded-For and using it to govern ACLs. - ); - -use constant MAX_HTTP_HEADER_LENGTH => 102400; # 100k, arbitrary - -use constant TRACK_OBJECTS => 0; # see @created_objects below -if (TRACK_OBJECTS) { - use Scalar::Util qw(weaken isweak); -} - -# kick-off one cleanup -_do_cleanup(); - -our %state_changes = (); # { "objref" => [ state, state, state, ... ] } -our $last_callbacks = 0; # time last ran callbacks -our $callbacks = []; # [ [ time, subref ], [ time, subref ], ... ] - -# this one deserves its own section. we keep track of every Perlbal::Socket object -# created if the TRACK_OBJECTS constant is on. we use weakened references, though, -# so this list will hopefully contain mostly undefs. users can ask for this list if -# they want to work with it via the get_created_objects_ref function. -our @created_objects; # ( $ref, $ref, $ref ... ) -our $last_co_cleanup = 0; # clean the list every few seconds - -sub get_statechange_ref { - return \%state_changes; -} - -sub get_created_objects_ref { - return \@created_objects; -} - -sub write_debuggy { - my $self = shift; - - my $cref = $_[0]; - my $content = ref $cref eq "SCALAR" ? $$cref : $cref; - my $clen = defined $content ? length($content) : "undef"; - $content = substr($content, 0, 17) . "..." if defined $content && $clen > 30; - my ($pkg, $filename, $line) = caller; - print "write($self, <$clen>\"$content\") from ($pkg, $filename, $line)\n" if Perlbal::DEBUG >= 4; - $self->SUPER::write(@_); -} - -if (Perlbal::DEBUG >= 4) { - *write = \&write_debuggy; -} - -sub new { - my Perlbal::Socket $self = shift; - $self = fields::new( $self ) unless ref $self; - - Perlbal::objctor($self); - - $self->SUPER::new( @_ ); - $self->{headers_string} = ''; - $self->{state} = undef; - $self->{do_die} = 0; - - $self->{read_buf} = []; # arrayref of scalar refs of bufs read from client - $self->{read_ahead} = 0; # bytes sitting in read_buf - $self->{read_size} = 0; # total bytes read from client - - my $now = time; - $self->{alive_time} = $self->{create_time} = $now; - - # now put this item in the list of created objects - if (TRACK_OBJECTS) { - # clean the created objects list if necessary - if ($last_co_cleanup < $now - 5) { - # remove out undefs, because those are natural byproducts of weakening - # references - @created_objects = grep { $_ } @created_objects; - - # however, the grep turned our weak references back into strong ones, so - # we have to re-weaken them - weaken($_) foreach @created_objects; - - # we've cleaned up at this point - $last_co_cleanup = $now; - } - - # now add this one to our cleaned list and weaken it - push @created_objects, $self; - weaken($created_objects[-1]); - } - - return $self; -} - -# FIXME: this doesn't scale in theory, but it might use less CPU in -# practice than using the Heap:: modules and manipulating the -# expirations all the time, thus doing things properly -# algorithmically. and this is definitely less work, so it's worth -# a try. -sub _do_cleanup { - my $sf = Perlbal::Socket->get_sock_ref; - - my $now = time; - - my @to_close; - while (my $k = each %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - - my $max_age = eval { $v->max_idle_time } || 0; - next unless $max_age; - - if ($v->{alive_time} < $now - $max_age) { - push @to_close, $v; - } - } - - foreach my $sock (@to_close) { - $sock->close("perlbal_timeout") - } - - Danga::Socket->AddTimer(5, \&_do_cleanup); -} - -# CLASS METHOD: given a delay (in seconds) and a subref, this will call -# that subref in AT LEAST delay seconds. if the subref returns 0, the -# callback is discarded, but if it returns a positive number, the callback -# is pushed onto the callback stack to be called again in at least that -# many seconds. -sub register_callback { - # adds a new callback to our list - my ($delay, $subref) = @_; - push @$callbacks, [ time + $delay, $subref ]; - return 1; -} - -# CLASS METHOD: runs through the list of registered callbacks and executes -# any that need to be executed -# FIXME: this doesn't scale. need a heap. -sub run_callbacks { - my $now = time; - return if $last_callbacks == $now; - $last_callbacks = $now; - - my @destlist = (); - foreach my $ref (@$callbacks) { - # if their time is <= now... - if ($ref->[0] <= $now) { - # find out if they want to run again... - my $rv = $ref->[1]->(); - - # and if they do, push onto list... - push @destlist, [ $rv + $now, $ref->[1] ] - if defined $rv && $rv > 0; - } else { - # not time for this one, just shove it - push @destlist, $ref; - } - } - $callbacks = \@destlist; -} - -# CLASS METHOD: -# default is for sockets to never time out. classes -# can override. -sub max_idle_time { 0; } - -# Socket: specific to HTTP socket types (only here and not in -# ClientHTTPBase because ClientManage wants it too) -sub read_request_headers { read_headers($_[0], 0); } -sub read_response_headers { read_headers($_[0], 1); } -sub read_headers { - my Perlbal::Socket $self = shift; - my $is_res = shift; - print "Perlbal::Socket::read_headers($self) is_res=$is_res\n" if Perlbal::DEBUG >= 2; - - my $sock = $self->{sock}; - - my $to_read = MAX_HTTP_HEADER_LENGTH - length($self->{headers_string}); - - my $bref = $self->read($to_read); - unless (defined $bref) { - # client disconnected - print " client disconnected\n" if Perlbal::DEBUG >= 3; - return $self->close('remote_closure'); - } - - $self->{headers_string} .= $$bref; - my $idx = index($self->{headers_string}, "\r\n\r\n"); - my $delim_len = 4; - - # can't find the header delimiter? check for LFLF header delimiter. - if ($idx == -1) { - $idx = index($self->{headers_string}, "\n\n"); - $delim_len = 2; - } - # still can't find the header delimiter? - if ($idx == -1) { - - # usually we get the headers all in one packet (one event), so - # if we get in here, that means it's more than likely the - # extra \r\n and if we clean it now (throw it away), then we - # can avoid a regexp later on. - if ($self->{ditch_leading_rn} && $self->{headers_string} eq "\r\n") { - print " throwing away leading \\r\\n\n" if Perlbal::DEBUG >= 3; - $self->{ditch_leading_rn} = 0; - $self->{headers_string} = ""; - return 0; - } - - print " can't find end of headers\n" if Perlbal::DEBUG >= 3; - $self->close('long_headers') - if length($self->{headers_string}) >= MAX_HTTP_HEADER_LENGTH; - return 0; - } - - my $hstr = substr($self->{headers_string}, 0, $idx); - print " pre-parsed headers: [$hstr]\n" if Perlbal::DEBUG >= 3; - - my $extra = substr($self->{headers_string}, $idx+$delim_len); - if (my $len = length($extra)) { - print " pushing back $len bytes after header\n" if Perlbal::DEBUG >= 3; - $self->push_back_read(\$extra); - } - - # some browsers send an extra \r\n after their POST bodies that isn't - # in their content-length. a base class can tell us when they're - # on their 2nd+ request after a POST and tell us to be ready for that - # condition, and we'll clean it up - $hstr =~ s/^\r\n// if $self->{ditch_leading_rn}; - - unless (($is_res ? $self->{res_headers} : $self->{req_headers}) = - Perlbal::HTTPHeaders->new(\$hstr, $is_res)) { - # bogus headers? close connection. - print " bogus headers\n" if Perlbal::DEBUG >= 3; - return $self->close("parse_header_failure"); - } - - print " got valid headers\n" if Perlbal::DEBUG >= 3; - - $Perlbal::reqs++ unless $is_res; - $self->{ditch_leading_rn} = 0; - - return $is_res ? $self->{res_headers} : $self->{req_headers}; -} - -### METHOD: drain_read_buf_to( $destination ) -### Write read-buffered data (if any) from the receiving object to the -### I object. -sub drain_read_buf_to { - my ($self, $dest) = @_; - return unless $self->{read_ahead}; - - while (my $bref = shift @{$self->{read_buf}}) { - print "draining readbuf from $self to $dest: [$$bref]\n" if Perlbal::DEBUG >= 3; - $dest->write($bref); - $self->{read_ahead} -= length($$bref); - } -} - -### METHOD: die_gracefully() -### By default, if we're in persist_wait state, close. Else, ignore. Children -### can override if they want to do some other processing. -sub die_gracefully { - my Perlbal::Socket $self = $_[0]; - if ($self->state eq 'persist_wait') { - $self->close('graceful_shutdown'); - } - $self->{do_die} = 1; -} - -### METHOD: write() -### Overridden from Danga::Socket to update our alive time on successful writes -### Stops sockets from being closed on long-running write operations -sub write { - my $self = shift; - - my $ret; - if ($ret = $self->SUPER::write(@_)) { - # Mark this socket alive so we don't time out - $self->{alive_time} = $Perlbal::tick_time; - } - - return $ret; -} - -### METHOD: close() -### Set our state when we get closed. -sub close { - my Perlbal::Socket $self = $_[0]; - $self->state('closed'); - return $self->SUPER::close($_[1]); -} - -### METHOD: state() -### If you pass a parameter, sets the state, else returns it. -sub state { - my Perlbal::Socket $self = shift; - return $self->{state} unless @_; - - push @{$state_changes{"$self"} ||= []}, $_[0] if Perlbal::TRACK_STATES; - return $self->{state} = $_[0]; -} - -sub observed_ip_string { - my Perlbal::Socket $self = shift; - - if (@_) { - return $self->{observed_ip_string} = $_[0]; - } else { - return $self->{observed_ip_string}; - } -} - -sub as_string_html { - my Perlbal::Socket $self = shift; - return $self->SUPER::as_string; -} - -sub DESTROY { - my Perlbal::Socket $self = shift; - delete $state_changes{"$self"} if Perlbal::TRACK_STATES; - Perlbal::objdtor($self); -} - -# package function (not a method). returns bytes sent, or -1 on error. -our $sf_defined = Sys::Syscall::sendfile_defined; -our $max_sf_readwrite = 128 * 1024; -sub sendfile { - my ($sfd, $fd, $bytes) = @_; - return Sys::Syscall::sendfile($sfd, $fd, $bytes) if $sf_defined; - - # no support for sendfile. ghetto version: read and write. - my $buf; - $bytes = $max_sf_readwrite if $bytes > $max_sf_readwrite; - - my $rv = POSIX::read($fd, $buf, $bytes); - return -1 unless defined $rv; - return -1 unless $rv == $bytes; - - my $wv = POSIX::write($sfd, $buf, $rv); - return -1 unless defined $wv; - - if (my $over_read = $rv - $wv) { - POSIX::lseek($fd, -$over_read, &POSIX::SEEK_CUR); - } - - return $wv; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/SocketSSL.pm b/lib/mogdeps/Perlbal/SocketSSL.pm deleted file mode 100644 index f868c251..00000000 --- a/lib/mogdeps/Perlbal/SocketSSL.pm +++ /dev/null @@ -1,135 +0,0 @@ -# Base class for SSL sockets. -# -# This is a simple class that extends Danga::Socket and contains an IO::Socket::SSL -# for the purpose of allowing non-blocking SSL in Perlbal. -# -# WARNING: this code will break IO::Socket::SSL if you use it in any plugins or -# have custom Perlbal modifications that use it. you will run into issues. This -# is because we override the close method to prevent premature closure of the socket, -# so you will end up with the socket not closing properly. -# -# Copyright 2007, Mark Smith . -# -# This file is licensed under the same terms as Perl itself. - -package Perlbal::SocketSSL; - -use strict; -use warnings; -no warnings qw(deprecated); - -use Danga::Socket 1.44; -use IO::Socket::SSL 0.98; -use Errno qw( EAGAIN ); - -use base 'Danga::Socket'; -use fields qw( listener create_time ); - -# magic IO::Socket::SSL crap to make it play nice with us -{ - no strict 'refs'; - no warnings 'redefine'; - - # replace IO::Socket::SSL::close with our own code... - my $orig = *IO::Socket::SSL::close{CODE}; - *IO::Socket::SSL::close = sub { - my $self = shift() - or return IO::Socket::SSL::_invalid_object(); - - # if we have args, close ourselves (second call!), else don't - if (exists ${*$self}->{__close_args}) { - $orig->($self, @{${*$self}->{__close_args}}); - } else { - ${*$self}->{__close_args} = [ @_ ]; - ${*$self}->{_danga_socket}->close('intercepted_ssl_close'); - } - }; -} - -# called: CLASS->new( $sock, $tcplistener ) -sub new { - my Perlbal::SocketSSL $self = shift; - $self = fields::new( $self ) unless ref $self; - - Perlbal::objctor($self); - - my ($sock, $listener) = @_; - - ${*$sock}->{_danga_socket} = $self; - $self->{listener} = $listener; - $self->{create_time} = time; - - $self->SUPER::new($sock); - - # TODO: would be good to have an overall timeout so that we can - # kill sockets that are open and just sitting there. "ssl_handshake_timeout" - # or something like that... - - return $self; -} - -# this is nonblocking, it attempts to setup SSL and if it can't then -# it returns whether it needs to read or write. we then setup to wait -# for the event it indicates and then wait. when that event fires, we -# call down again, and repeat the process until we have setup the -# SSL connection. -sub try_accept { - my Perlbal::SocketSSL $self = shift; - - my $sock = $self->{sock}->accept_SSL; - - if (defined $sock) { - # looks like we got it! let's steal it from ourselves - # so Danga::Socket gives up on it and we can send - # it out to someone else. (we discard the return value - # as we already have it in $sock) - # - # of course, life isn't as simple as that. we have to do - # some trickery with the ordering here to ensure that we - # don't setup the new class until after the Perlbal::SocketSSL - # goes away according to Danga::Socket. - # - # if we don't do it this way, we get nasty errors because - # we (this object) still exists in the DescriptorMap of - # Danga::Socket when the new Perlbal::ClientXX tries to - # insert itself there. - - # removes us from the active polling, closes up shop, but - # save our fd first! - my $fd = $self->{fd}; - $self->steal_socket; - - # finish blowing us away - my $ref = Danga::Socket->DescriptorMap(); - delete $ref->{$fd}; - - # now stick the new one in - $self->{listener}->class_new_socket($sock); - return; - } - - # nope, let's see if we can continue the process - if ($! == EAGAIN) { - if ($SSL_ERROR == SSL_WANT_READ) { - $self->watch_read(1); - } elsif ($SSL_ERROR == SSL_WANT_WRITE) { - $self->watch_write(1); - } else { - $self->close('invalid_ssl_state'); - } - } else { - $self->close('invalid_ssl_error'); - } -} - -sub event_read { - $_[0]->watch_read(0); - $_[0]->try_accept; -} - -sub event_write { - $_[0]->watch_write(0); - $_[0]->try_accept; -} - -1; diff --git a/lib/mogdeps/Perlbal/TCPListener.pm b/lib/mogdeps/Perlbal/TCPListener.pm deleted file mode 100644 index b16b81f9..00000000 --- a/lib/mogdeps/Perlbal/TCPListener.pm +++ /dev/null @@ -1,193 +0,0 @@ -###################################################################### -# TCP listener on a given port -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - - -package Perlbal::TCPListener; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('service', - 'hostport', - 'sslopts', - 'v6', # bool: IPv6 libraries are available - ); -use Socket qw(IPPROTO_TCP SOL_SOCKET SO_SNDBUF); - -BEGIN { - eval { require Perlbal::SocketSSL }; - if (Perlbal::DEBUG > 0 && $@) { warn "SSL support failed on load: $@\n" } -} - -# TCPListener -sub new { - my Perlbal::TCPListener $self = shift; - my ($hostport, $service, $opts) = @_; - - $self = fields::new($self) unless ref $self; - $opts ||= {}; - - # Were ipv4 or ipv6 explicitly mentioned by syntax? - my $force_v4 = 0; - my $force_v6 = 0; - - my @args; - if ($hostport =~ /^\d+$/) { - @args = ('LocalPort' => $hostport); - } elsif ($hostport =~ /^\d+\.\d+\.\d+\.\d+:/) { - $force_v4 = 1; - @args = ('LocalAddr' => $hostport); - } - - my $v6_errors = ""; - - my $can_v6 = 0; - if (!$force_v4) { - eval "use Danga::Socket 1.61; 1; "; - if ($@) { - $v6_errors = "Danga::Socket 1.61 required for IPv6 support."; - } elsif (!eval { require IO::Socket::INET6; 1 }) { - $v6_errors = "IO::Socket::INET6 required for IPv6 support."; - } else { - $can_v6 = 1; - } - } - - my $socket_class = $can_v6 ? "IO::Socket::INET6" : "IO::Socket::INET"; - $self->{v6} = $can_v6; - - my $sock = $socket_class->new( - @args, - Proto => IPPROTO_TCP, - Listen => 1024, - ReuseAddr => 1, - ); - - return Perlbal::error("Error creating listening socket: " . ($@ || $!)) - unless $sock; - - if ($^O eq 'MSWin32') { - # On Windows, we have to do this a bit differently. - # IO::Socket should really do this for us, but whatever. - my $do = 1; - ioctl($sock, 0x8004667E, \$do) or return Perlbal::error("Unable to make listener on $hostport non-blocking: $!"); - } - else { - # IO::Socket::INET's Blocking => 0 just doesn't seem to work - # on lots of perls. who knows why. - IO::Handle::blocking($sock, 0) or return Perlbal::error("Unable to make listener on $hostport non-blocking: $!"); - } - - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{hostport} = $hostport; - $self->{sslopts} = $opts->{ssl}; - $self->watch_read(1); - return $self; -} - -# TCPListener: accepts a new client connection -sub event_read { - my Perlbal::TCPListener $self = shift; - - # accept as many connections as we can - while (my ($psock, $peeraddr) = $self->{sock}->accept) { - IO::Handle::blocking($psock, 0); - - if (my $sndbuf = $self->{service}->{client_sndbuf_size}) { - my $rv = setsockopt($psock, SOL_SOCKET, SO_SNDBUF, pack("L", $sndbuf)); - } - - if (Perlbal::DEBUG >= 1) { - my ($pport, $pipr) = $self->{v6} ? - Socket6::unpack_sockaddr_in6($peeraddr) : - Socket::sockaddr_in($peeraddr); - my $pip = $self->{v6} ? - "[" . Socket6::inet_ntop(Socket6::AF_INET6(), $pipr) . "]" : - Socket::inet_ntoa($pipr); - print "Got new conn: $psock ($pip:$pport) for " . $self->{service}->role . "\n"; - } - - # SSL promotion if necessary - if ($self->{sslopts}) { - # try to upgrade to SSL, this does no IO it just re-blesses - # and prepares the SSL engine for handling us later - IO::Socket::SSL->start_SSL( - $psock, - SSL_server => 1, - SSL_startHandshake => 0, - %{ $self->{sslopts} }, - ); - print " .. socket upgraded to SSL!\n" if Perlbal::DEBUG >= 1; - - # safety checking to ensure we got upgraded - return $psock->close - unless ref $psock eq 'IO::Socket::SSL'; - - # class into new package and run with it - my $sslsock = new Perlbal::SocketSSL($psock, $self); - $sslsock->try_accept; - - # all done from our point of view - next; - } - - # puts this socket into the right class - $self->class_new_socket($psock); - } -} - -sub class_new_socket { - my Perlbal::TCPListener $self = shift; - my $psock = shift; - - my $service_role = $self->{service}->role; - if ($service_role eq "reverse_proxy") { - Perlbal::ClientProxy->new($self->{service}, $psock); - } elsif ($service_role eq "management") { - Perlbal::ClientManage->new($self->{service}, $psock); - } elsif ($service_role eq "web_server") { - Perlbal::ClientHTTP->new($self->{service}, $psock); - } elsif ($service_role eq "selector") { - # will be cast to a more specific class later... - Perlbal::ClientHTTPBase->new($self->{service}, $psock, $self->{service}); - } elsif (my $creator = Perlbal::Service::get_role_creator($service_role)) { - # was defined by a plugin, so we want to return one of these - $creator->($self->{service}, $psock); - } -} - -sub as_string { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service '$svc->{name}'"; - return $ret; -} - -sub as_string_html { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string_html; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service $svc->{name}"; - return $ret; -} - -sub die_gracefully { - # die off so we stop waiting for new connections - my $self = shift; - $self->close('graceful_death'); -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Test.pm b/lib/mogdeps/Perlbal/Test.pm deleted file mode 100644 index e5bda231..00000000 --- a/lib/mogdeps/Perlbal/Test.pm +++ /dev/null @@ -1,401 +0,0 @@ -#!/usr/bin/perl -w - -package Perlbal::Test; - -=head1 NAME - -Perlbal::Test - Test harness for perlbal server - -=head1 SYNOPSIS - -# my $msock = Perlbal::Test::start_server(); - -=head1 DESCRIPTION - -Perlbal::Test provides access to a perlbal server running on the -local host, for testing purposes. - -The server can be an already-existing server, a child process, or -the current process. - -Various functions are provided to interact with the server. - -=head1 FUNCTIONS - -=cut - -use strict; -use POSIX qw( :sys_wait_h ); -use IO::Socket::INET; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); -use HTTP::Response; - -require Exporter; -use vars qw(@ISA @EXPORT); -@ISA = qw(Exporter); -@EXPORT = qw(ua start_server foreach_aio manage filecontent tempdir new_port - manage_multi - mgmt_port wait_on_child dump_res resp_from_sock msock); - -our $i_am_parent = 0; -our $msock; # management sock of child -our $to_kill = 0; -our $mgmt_port; - -our $free_port = 60000; - -=head1 I - -Return the current management port number. - -=cut - -sub mgmt_port { - return $mgmt_port; -} - -END { - manage("shutdown") if $i_am_parent; -} - -=head1 I - -Return a readable string formatted from an HTTP::Response object. -Only the first 80 characters of returned content are returned. - -=cut - -sub dump_res { - my $res = shift; - my ($pkg, $filename, $line) = caller; - my $ret = "$filename:$line ==> "; - unless ($res) { - $ret .= "[response undefined]\n"; - return $ret; - } - my $ct = $res->content; - my $len = length $ct; - if ($len > 80) { - $ct = substr($ct, 0, 80) . "..."; - } - my $status = $res->status_line; - $status =~ s/[\r\n]//g; - return $ret . "status=[$status] content=$len" . "[$ct]\n"; -} - -=head1 I - -Return a newly created temporary directory. The directory will be -removed automatically upon program exit. - -=cut - -sub tempdir { - require File::Temp; - return File::Temp::tempdir( CLEANUP => 1 ); -} - -=head1 I - -Return the next free port number in the series. Port numbers are assigned -starting at 60000. - -=cut - -sub new_port { - test_port() ? return $free_port++ : return new_port($free_port++); -} - -=head1 I - -Return 1 if the port is free to use for listening on $free_port else return 0. - -=cut - -sub test_port { - my $sock = IO::Socket::INET->new(LocalPort => $free_port) or return 0; - $sock->close(); - return 1; -} - -=head1 I> - -Return a string containing the contents of the file $file. If $file -cannot be opened, then return undef. - -=cut - -sub filecontent { - my $file = shift; - my $ct; - open (F, $file) or return undef; - $ct = do { local $/; ; }; - close F; - return $ct; -} - -=head1 I - -Set the server into each AIO mode (none, ioaio) and call the specified -callback function with the mode name as argument. - -=cut - -sub foreach_aio (&) { - my $cb = shift; - - foreach my $mode (qw(none ioaio)) { - my $line = manage("SERVER aio_mode = $mode"); - next unless $line; - $cb->($mode); - } -} - -=head1 I - -Send a command $cmd to the server, and return the response line from -the server. - -Optional arguments are: - - quiet_failure => 1 - -Output a warning if the response indicated an error, -unless $opts{quiet_failure} is true, or the command -was 'shutdown' (which doesn't return a response). - -=cut - -sub manage { - my $cmd = shift; - my %opts = @_; - - print $msock "$cmd\r\n"; - my $res = <$msock>; - - if (!$res || $res =~ /^ERR/) { - # Make the result visible in failure cases, unless - # the command was 'shutdown'... cause that never - # returns anything. - warn "Manage command failed: '$cmd' '$res'\n" - unless($opts{quiet_failure} || $cmd eq 'shutdown'); - - return 0; - } - return $res; -} - -=head1 I - -Send a command $cmd to the server, and return a multi-line -response. Return the number zero if there was an error or -no response. - -=cut - -sub manage_multi { - my $cmd = shift; - - print $msock "$cmd\r\n"; - my $res; - while (<$msock>) { - last if /^\./; - last if /^ERROR/; - $res .= $_; - } - return 0 if !$res || $res =~ /^ERR/; - return $res; -} - -=head1 I - -Optionally start a perlbal server and return a socket connected to its -management port. - -The argument $conf is a string specifying initial configuration -commands. - -If the environment variable TEST_PERLBAL_FOREGROUND is set to a true -value then a server will be started in the foreground, in which case -this function does not return. When the server function finishes, -exit() will be called to terminate the process. - -If the environment variable TEST_PERLBAL_USE_EXISTING is set to a true -value then a socket will be returned which is connected to an existing -server's management port. - -Otherwise, a child process is forked and a socket is returned which is -connected to the child's management port. - -The management port is assigned automatically, a new port number each -time this function is called. The starting port number is 60000. - -=cut - -sub start_server { - my $conf = shift; - $mgmt_port = new_port(); - - if ($ENV{'TEST_PERLBAL_FOREGROUND'}) { - _start_perbal_server($conf, $mgmt_port); - } - - if ($ENV{'TEST_PERLBAL_USE_EXISTING'}) { - my $msock = wait_on_child(0, $mgmt_port); - return $msock; - } - - my $child = fork; - if ($child) { - $i_am_parent = 1; - $to_kill = $child; - my $msock = wait_on_child($child, $mgmt_port); - my $rv = waitpid($child, WNOHANG); - if ($rv) { - die "Child process (webserver) died.\n"; - } - print $msock "proc\r\n"; - my $spid = undef; - while (<$msock>) { - last if m!^\.\r?\n!; - next unless /^pid:\s+(\d+)/; - $spid = $1; - } - die "Our child was $child, but we connected and it says it's $spid." - unless $child == $spid; - - return $msock; - } - - # child process... - _start_perbal_server($conf, $mgmt_port); -} - -# Start a perlbal server running and tell it to listen on the specified -# management port number. This function does not return. - -sub _start_perbal_server { - my ($conf, $mgmt_port) = @_; - - require Perlbal; - - $conf .= qq{ -CREATE SERVICE mgmt -SET mgmt.listen = 127.0.0.1:$mgmt_port -SET mgmt.role = management -ENABLE mgmt -}; - - my $out = sub { print STDOUT "$_[0]\n"; }; - die "Configuration error" unless Perlbal::run_manage_commands($conf, $out); - - unless (Perlbal::Socket->WatchedSockets() > 0) { - die "Invalid configuration. (shouldn't happen?) Stopping (self=$$).\n"; - } - - Perlbal::run(); - exit 0; -} - - -=head1 I - -Return a reference to the socket connected to the server's management -port. - -=cut - -sub msock { - return $msock; -} - - -=head1 I - -Return a new instance of LWP::UserAgent. - -=cut - -sub ua { - require LWP; - require LWP::UserAgent; - return LWP::UserAgent->new; -} - -=head1 I - -Return a socket which is connected to a child process. - -$pid specifies the child process id, and $port is the port number on -which the child is listening. - -Several attempts are made; if the child dies or a connection cannot -be made within 5 seconds then this function dies with an error message. - -=cut - -sub wait_on_child { - my $pid = shift; - my $port = shift; - - my $start = time; - while (1) { - $msock = IO::Socket::INET->new(PeerAddr => "127.0.0.1:$port"); - return $msock if $msock; - select undef, undef, undef, 0.25; - if ($pid && waitpid($pid, WNOHANG) > 0) { - die "Child process (webserver) died.\n"; - } - die "Timeout waiting for port $port to startup" if time > $start + 5; - } -} - -=head1 I - -Read an HTTP response from a socket and return it -as an HTTP::Response object - -In scalar mode, return only the $http_response object. - -In array mode, return an array of ($http_response, $firstline) where -$firstline is the first line read from the socket, for example: - -"HTTP/1.1 200 OK" - -=cut - -sub resp_from_sock { - my $sock = shift; - - my $res = ""; - my $firstline = undef; - - while (<$sock>) { - $res .= $_; - $firstline ||= $_; - last if ! $_ || /^\r?\n/; - } - - unless ($firstline) { - print STDERR "Didn't get a firstline in HTTP response.\n"; - return undef; - } - - my $resp = HTTP::Response->parse($res); - return undef unless $resp; - - my $cl = $resp->header('Content-Length'); - if (defined $cl && $cl > 0) { - my $content = ''; - my $rv; - while (($rv = read($sock, $content, $cl)) && - ($cl -= $rv) > 0) { - # don't do anything, the loop is it - } - $resp->content($content); - } - - return wantarray ? ($resp, $firstline) : $resp; -} - -1; diff --git a/lib/mogdeps/Perlbal/Test/WebClient.pm b/lib/mogdeps/Perlbal/Test/WebClient.pm deleted file mode 100644 index 71edbbd4..00000000 --- a/lib/mogdeps/Perlbal/Test/WebClient.pm +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl - -package Perlbal::Test::WebClient; - -use strict; -use IO::Socket::INET; -use Perlbal::Test; -use HTTP::Response; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); - -require Exporter; -use vars qw(@ISA @EXPORT $FLAG_NOSIGNAL); -@ISA = qw(Exporter); -@EXPORT = qw(new); - -$FLAG_NOSIGNAL = 0; -eval { $FLAG_NOSIGNAL = MSG_NOSIGNAL; }; - -# create a blank object -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - return $self; -} - -# get/set what server we should be testing; "ip:port" generally -sub server { - my $self = shift; - if (@_) { - $self->{_sock} = undef; - return $self->{server} = shift; - } else { - return $self->{server}; - } -} - -# get/set what hostname we send with requests -sub host { - my $self = shift; - if (@_) { - $self->{_sock} = undef; - return $self->{host} = shift; - } else { - return $self->{host}; - } -} - -# set which HTTP version to emulate; specify '1.0' or '1.1' -sub http_version { - my $self = shift; - if (@_) { - return $self->{http_version} = shift; - } else { - return $self->{http_version}; - } -} - -# set on or off to enable or disable persistent connection -sub keepalive { - my $self = shift; - if (@_) { - $self->{keepalive} = shift() ? 1 : 0; - } - return $self->{keepalive}; -} - -# construct and send a request -sub request { - my $self = shift; - return undef unless $self->{server}; - - my $opts = ref $_[0] eq "HASH" ? shift : {}; - my $opt_headers = delete $opts->{'headers'}; - my $opt_host = delete $opts->{'host'}; - my $opt_method = delete $opts->{'method'}; - my $opt_content = delete $opts->{'content'}; - my $opt_extra_rn = delete $opts->{'extra_rn'}; - my $opt_return_reader = delete $opts->{'return_reader'}; - my $opt_post_header_pause = delete $opts->{'post_header_pause'}; - die "Bogus options: " . join(", ", keys %$opts) if %$opts; - - my $cmds = join(',', map { eurl($_) } @_); - return undef unless $cmds; - - # keep-alive header if 1.0, also means add content-length header - my $headers = ''; - if ($self->{keepalive}) { - $headers .= "Connection: keep-alive\r\n"; - } else { - $headers .= "Connection: close\r\n"; - } - - if ($opt_headers) { - $headers .= $opt_headers; - } - - if (my $hostname = $opt_host || $self->{host}) { - $headers .= "Host: $hostname\r\n"; - } - my $method = $opt_method || "GET"; - my $body = ""; - - if ($opt_content) { - $headers .= "Content-Length: " . length($opt_content) . "\r\n"; - $body = $opt_content; - } - - if ($opt_extra_rn) { - $body .= "\r\n"; # some browsers on POST send an extra \r\n that's not part of content-length - } - - my $send = "$method /$cmds HTTP/$self->{http_version}\r\n$headers\r\n"; - - unless ($opt_post_header_pause) { - $send .= $body; - } - - my $len = length $send; - - # send setup - my $rv; - my $sock = delete $self->{_sock}; - local $SIG{'PIPE'} = "IGNORE" unless $FLAG_NOSIGNAL; - - ### send it cached - if ($sock) { - $rv = send($sock, $send, $FLAG_NOSIGNAL); - if ($! || ! defined $rv) { - undef $self->{_sock}; - } elsif ($rv != $len) { - return undef; - } - } - - # failing that, send it through a new socket - unless ($rv) { - $self->{_reqdone} = 0; - - $sock = IO::Socket::INET->new( - PeerAddr => $self->{server}, - Timeout => 3, - ) or return undef; - setsockopt($sock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die "failed to set sockopt: $!\n"; - - $rv = send($sock, $send, $FLAG_NOSIGNAL); - if ($! || $rv != $len) { - return undef; - } - } - - if ($opt_post_header_pause) { - select undef, undef, undef, $opt_post_header_pause; - my $len = length $body; - if ($len) { - my $rv = send($sock, $body, $FLAG_NOSIGNAL); - if ($! || ! defined $rv) { - undef $self->{_sock}; - } elsif ($rv != $len) { - return undef; - } - } - } - - my $parse_it = sub { - my ($resp, $firstline) = resp_from_sock($sock); - - my $conhdr = $resp->header("Connection") || ""; - if (($firstline =~ m!\bHTTP/1\.1\b! && $conhdr !~ m!\bclose\b!i) || - ($firstline =~ m!\bHTTP/1\.0\b! && $conhdr =~ m!\bkeep-alive\b!i)) { - $self->{_sock} = $sock; - $self->{_reqdone}++; - } else { - $self->{_reqdone} = 0; - } - - return $resp; - }; - - if ($opt_return_reader) { - return $parse_it; - } else { - return $parse_it->(); - } -} - -sub reqdone { - my $self = shift; - return $self->{_reqdone}; -} - -# general purpose URL escaping function -sub eurl { - my $a = $_[0]; - $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg; - $a =~ tr/ /+/; - return $a; -} - -1; diff --git a/lib/mogdeps/Perlbal/Test/WebServer.pm b/lib/mogdeps/Perlbal/Test/WebServer.pm deleted file mode 100644 index 5d154482..00000000 --- a/lib/mogdeps/Perlbal/Test/WebServer.pm +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/perl - -package Perlbal::Test::WebServer; - -use strict; -use IO::Socket::INET; -use HTTP::Request; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); -use Perlbal::Test; - -use Perlbal::Test::WebClient; - -require Exporter; -use vars qw(@ISA @EXPORT); -@ISA = qw(Exporter); -@EXPORT = qw(start_webserver); - -our @webserver_pids; - -my $testpid; # of the test suite's main program, the one running the HTTP client - -END { - # ensure we kill off the webserver - kill 9, @webserver_pids if $testpid && $testpid == $$; -} - - -sub start_webserver { - my $port = new_port(); - - # dummy mode - if ($ENV{'TEST_PERLBAL_USE_EXISTING'}) { - return $port; - } - - $testpid = $$; - - if (my $child = fork) { - # i am parent, wait for child to startup - push @webserver_pids, $child; - my $sock = wait_on_child($child, $port); - die "Unable to spawn webserver on port $port\n" - unless $sock; - print $sock "GET /reqdecr,status HTTP/1.0\r\n\r\n"; - my $line = <$sock>; - die "Didn't get 200 OK: " . (defined $line ? $line : "(undef)") - unless $line && $line =~ /200 OK/; - return $port; - } - - # i am child, start up - my $ssock = IO::Socket::INET->new(LocalPort => $port, ReuseAddr => 1, Listen => 3) - or die "Unable to start socket: $!\n"; - while (my $csock = $ssock->accept) { - exit 0 unless $csock; - fork and next; # parent starts waiting for next request - setsockopt($csock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die; - serve_client($csock); - } -} - -sub serve_client { - my $csock = shift; - my $req_num = 0; - my $did_options = 0; - my @reqs; - - REQ: - while (1) { - my $req = ''; - my $clen = undef; - while (<$csock>) { - $req .= $_; - if (/^content-length:\s*(\d+)/i) { $clen = $1; }; - last if ! $_ || /^\r?\n/; - } - exit 0 unless $req; - - # parse out things we want to have - my @cmds; - my $httpver = 0; # 0 = 1.0, 1 = 1.1, undef = neither - my $method; - if ($req =~ m!^([A-Z]+) /?(\S+) HTTP/(1\.\d+)\r?\n?!) { - $method = $1; - my $cmds = durl($2); - @cmds = split(/\s*,\s*/, $cmds); - $req_num++; - $httpver = ($3 eq '1.0' ? 0 : ($3 eq '1.1' ? 1 : undef)); - } - my $msg = HTTP::Request->parse($req); - my $keeping_alive = undef; - - my $body; - if ($clen) { - die "Can't read a body on a GET or HEAD" if $method =~ /^GET|HEAD$/; - my $read = read $csock, $body, $clen; - die "Didn't read $clen bytes. Got $read." if $clen != $read; - } - - my $response = sub { - my %opts = @_; - my $code = delete $opts{code}; - my $codetext = delete $opts{codetext}; - my $content = delete $opts{content}; - my $ctype = delete $opts{type}; - my $extra_hdr = delete $opts{headers}; - die "unknown data in opts: %opts" if %opts; - - $extra_hdr ||= ''; - $code ||= $content ? 200 : 200; - $codetext ||= { 200 => 'OK', 500 => 'Internal Server Error', 204 => "No Content" }->{$code}; - $content ||= ""; - - my $clen = length $content; - $ctype ||= "text/plain" unless $code == 204; - $extra_hdr .= "Content-Type: $ctype\r\n" if $ctype; - - my $hdr_keepalive = ""; - - unless (defined $keeping_alive) { - my $hdr_connection = $msg->header('Connection') || ''; - if ($httpver == 1) { - if ($hdr_connection =~ /\bclose\b/i) { - $keeping_alive = 0; - } else { - $keeping_alive = "1.1implicit"; - } - } - if ($httpver == 0 && $hdr_connection =~ /\bkeep-alive\b/i) { - $keeping_alive = "1.0keepalive"; - } - } - - if ($keeping_alive) { - $hdr_keepalive = "Connection: keep-alive\n"; - } else { - $hdr_keepalive = "Connection: close\n"; - } - - return "HTTP/1.0 $code $codetext\r\n" . - $hdr_keepalive . - "Content-Length: $clen\r\n" . - $extra_hdr . - "\r\n" . - "$content"; - }; - - my $send = sub { - my $res = shift; - print $csock $res; - exit 0 unless $keeping_alive; - }; - - # 500 if no commands were given or we don't know their HTTP version - # or we didn't parse a proper HTTP request - unless (@cmds && defined $httpver && $msg) { - print STDERR "500 response!\n"; - $send->($response->(code => 500)); - next REQ; - } - - if ($method eq "OPTIONS") { - $did_options = 1; - $send->($response->(code => 200)); - next REQ; - } - - # prepare a simple 200 to send; undef this if you want to control - # your own output below - my $to_send; - - foreach my $cmd (@cmds) { - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - - if ($cmd =~ /^sleep:([\d\.]+)$/i) { - my $sleeptime = $1; - #print "I, $$, should sleep for $sleeptime.\n"; - use Time::HiRes; - my $t1 = Time::HiRes::time(); - select undef, undef, undef, $1; - my $t2 = Time::HiRes::time(); - my $td = $t2 - $t1; - #print "I, $$, slept for $td\n"; - } - - if ($cmd =~ /^keepalive:([01])$/i) { - $keeping_alive = $1; - } - - if ($cmd eq "status") { - my $len = $clen || 0; - my $bu = $msg->header('X-PERLBAL-BUFFERED-UPLOAD-REASON') || ''; - $to_send = $response->(content => - "pid = $$\nreqnum = $req_num\nmethod = $method\n". - "length = $len\nbuffered = $bu\noptions = $did_options\n"); - } - - if ($cmd eq "reqdecr") { - $req_num--; - } - - if ($cmd =~ /^kill:(\d+):(\w+)$/) { - kill $2, $1; - } - - if ($cmd =~ /^reproxy_url:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-URL: $1\r\n", - code => 204, - ); - } - - if ($cmd =~ /^reproxy_url204:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-URL: $1\r\n"); - } - - if ($cmd =~ /^reproxy_url_cached:(\d+):(.+)/i) { - kill 'USR1', $testpid; - $to_send = $response->(headers => - "X-Reproxy-URL: $2\r\nX-Reproxy-Cache-For: $1; Last-Modified Content-Type\r\n"); - } - - if ($cmd =~ /^reproxy_url_multi:((?:\d+:){2,})(\S+)/i) { - my $ports = $1; - my $path = $2; - my @urls; - foreach my $port (split(/:/, $ports)) { - push @urls, "http://127.0.0.1:$port$path"; - } - $to_send = $response->(headers => "X-Reproxy-URL: @urls\r\n"); - } - - if ($cmd =~ /^reproxy_file:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-File: $1\r\n"); - } - - if ($cmd =~ /^subreq:(\d+)$/) { - my $port = $1; - my $wc = Perlbal::Test::WebClient->new; - $wc->server("127.0.0.1:$port"); - $wc->keepalive(0); - $wc->http_version('1.0'); - my $resp = $wc->request("status"); - my $subpid; - if ($resp && $resp->content =~ /^pid = (\d+)$/m) { - $subpid = $1; - } - $to_send = $response->(content => "pid = $$\nsubpid = $subpid\nreqnum = $req_num\n"); - } - } - - $send->($to_send || $response->()); - } # while(1) -} - -# de-url escape -sub durl { - my ($a) = @_; - $a =~ tr/+/ /; - $a =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; - return $a; -} - -1; diff --git a/lib/mogdeps/Perlbal/UploadListener.pm b/lib/mogdeps/Perlbal/UploadListener.pm deleted file mode 100644 index 062f7ef6..00000000 --- a/lib/mogdeps/Perlbal/UploadListener.pm +++ /dev/null @@ -1,107 +0,0 @@ -###################################################################### -# Listen for UDP upload status packets -# -# Copyright 2005-2007, Six Apart, Ltd. - - -package Perlbal::UploadListener; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields qw(service hostport); - -# TCPListener -sub new { - my ($class, $hostport, $service) = @_; - - my $sock = - IO::Socket::INET->new( - LocalAddr => $hostport, - Proto => "udp", - ReuseAddr => 1, - Blocking => 0, - ); - - return Perlbal::error("Error creating listening socket: " . ($@ || $!)) - unless $sock; - my $self = fields::new($class); - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{hostport} = $hostport; - $self->watch_read(1); - return $self; -} - -my %status; -my @todelete; - -sub get_status { - my $ses = shift; - return $status{$ses}; -} - -# TCPListener: accepts a new client connection -sub event_read { - my Perlbal::TCPListener $self = shift; - - my $buf; - $self->{sock}->recv($buf, 500); - return unless $buf =~ /^UPLOAD:(\w{5,50}):(\d+):(\d+):(\d+):(\d+)$/; - my ($ses, $done, $total, $starttime, $nowtime) = ($1, $2, $3, $4, $5); - - my $now = time(); - - $status{$ses} = { - done => $done, - total => $total, - starttime => $starttime, - lasttouch => $now, - }; - - # keep a history of touched records, then we'll clean 'em - # after 30 seconds. - push @todelete, [$now, $ses]; - my $too_old = $now - 4; - while (@todelete && $todelete[0][0] < $too_old) { - my $rec = shift @todelete; - my $to_kill = $rec->[1]; - if (my $krec = $status{$to_kill}) { - my $last_touch = $krec->{lasttouch}; - delete $status{$to_kill} if $last_touch < $too_old; - } - } -} - -sub as_string { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service '$svc->{name}'"; - return $ret; -} - -sub as_string_html { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string_html; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service $svc->{name}"; - return $ret; -} - -sub die_gracefully { - # die off so we stop waiting for new connections - my $self = shift; - $self->close('graceful_death'); -} - - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Util.pm b/lib/mogdeps/Perlbal/Util.pm deleted file mode 100644 index 17efc599..00000000 --- a/lib/mogdeps/Perlbal/Util.pm +++ /dev/null @@ -1,53 +0,0 @@ -# misc util functions -# - -package Perlbal::Util; -use strict; -use warnings; -no warnings qw(deprecated); - -sub durl { - my ($txt) = @_; - $txt =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; - return $txt; -} - -=head2 C< rebless > - -Safely re-bless a locked (use fields) hash into another package. Note -that for our convenience elsewhere the set of allowable keys for the -re-blessed hash will be the union of the keys allowed by its old package -and those allowed for the package into which it is blessed. - -=cut - -BEGIN { - if ($] >= 5.010) { - eval q{ - use Hash::Util qw(legal_ref_keys unlock_ref_keys lock_ref_keys) - }; - *rebless = sub { - my ($obj, $pkg) = @_; - my @keys = legal_ref_keys($obj); - unlock_ref_keys($obj); - bless $obj, $pkg; - lock_ref_keys($obj, @keys, - legal_ref_keys(fields::new($pkg))); - return $obj; - }; - } - else { - *rebless = sub { - my ($obj, $pkg) = @_; - return bless $obj, $pkg; - }; - } -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Sys/Syscall.pm b/lib/mogdeps/Sys/Syscall.pm deleted file mode 100644 index 25272587..00000000 --- a/lib/mogdeps/Sys/Syscall.pm +++ /dev/null @@ -1,335 +0,0 @@ -# LICENSE: You're free to distribute this under the same terms as Perl itself. - -package Sys::Syscall; -use strict; -use POSIX qw(ENOSYS SEEK_CUR); -use Config; - -require Exporter; -use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION); - -$VERSION = "0.22"; -@ISA = qw(Exporter); -@EXPORT_OK = qw(sendfile epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD); -%EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD)], - sendfile => [qw(sendfile)], - ); - -use constant EPOLLIN => 1; -use constant EPOLLOUT => 4; -use constant EPOLLERR => 8; -use constant EPOLLHUP => 16; -use constant EPOLLRDBAND => 128; -use constant EPOLL_CTL_ADD => 1; -use constant EPOLL_CTL_DEL => 2; -use constant EPOLL_CTL_MOD => 3; - -our $loaded_syscall = 0; - -sub _load_syscall { - # props to Gaal for this! - return if $loaded_syscall++; - my $clean = sub { - delete @INC{qw}; - }; - $clean->(); # don't trust modules before us - my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 }; - $clean->(); # don't require modules after us trust us - return $rv; -} - -our ($sysname, $nodename, $release, $version, $machine) = POSIX::uname(); - -our ( - $SYS_epoll_create, - $SYS_epoll_ctl, - $SYS_epoll_wait, - $SYS_sendfile, - $SYS_readahead, - ); - -if ($^O eq "linux") { - # whether the machine requires 64-bit numbers to be on 8-byte - # boundaries. - my $u64_mod_8 = 0; - - # if we're running on an x86_64 kernel, but a 32-bit process, - # we need to use the i386 syscall numbers. - if ($machine eq "x86_64" && $Config{ptrsize} == 4) { - $machine = "i386"; - } - - if ($machine =~ m/^i[3456]86$/) { - $SYS_epoll_create = 254; - $SYS_epoll_ctl = 255; - $SYS_epoll_wait = 256; - $SYS_sendfile = 187; # or 64: 239 - $SYS_readahead = 225; - } elsif ($machine eq "x86_64") { - $SYS_epoll_create = 213; - $SYS_epoll_ctl = 233; - $SYS_epoll_wait = 232; - $SYS_sendfile = 40; - $SYS_readahead = 187; - } elsif ($machine eq "ppc64") { - $SYS_epoll_create = 236; - $SYS_epoll_ctl = 237; - $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # (sys32_sendfile). sys32_sendfile64=226 (64 bit processes: sys_sendfile64=186) - $SYS_readahead = 191; # both 32-bit and 64-bit vesions - $u64_mod_8 = 1; - } elsif ($machine eq "ppc") { - $SYS_epoll_create = 236; - $SYS_epoll_ctl = 237; - $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # sys_sendfile64=226 - $SYS_readahead = 191; - $u64_mod_8 = 1; - } elsif ($machine eq "ia64") { - $SYS_epoll_create = 1243; - $SYS_epoll_ctl = 1244; - $SYS_epoll_wait = 1245; - $SYS_sendfile = 1187; - $SYS_readahead = 1216; - $u64_mod_8 = 1; - } elsif ($machine eq "alpha") { - # natural alignment, ints are 32-bits - $SYS_sendfile = 370; # (sys_sendfile64) - $SYS_epoll_create = 407; - $SYS_epoll_ctl = 408; - $SYS_epoll_wait = 409; - $SYS_readahead = 379; - $u64_mod_8 = 1; - } else { - # as a last resort, try using the *.ph files which may not - # exist or may be wrong - _load_syscall(); - $SYS_epoll_create = eval { &SYS_epoll_create; } || 0; - $SYS_epoll_ctl = eval { &SYS_epoll_ctl; } || 0; - $SYS_epoll_wait = eval { &SYS_epoll_wait; } || 0; - $SYS_readahead = eval { &SYS_readahead; } || 0; - } - - if ($u64_mod_8) { - *epoll_wait = \&epoll_wait_mod8; - *epoll_ctl = \&epoll_ctl_mod8; - } else { - *epoll_wait = \&epoll_wait_mod4; - *epoll_ctl = \&epoll_ctl_mod4; - } -} - -elsif ($^O eq "freebsd") { - if ($ENV{FREEBSD_SENDFILE}) { - # this is still buggy and in development - $SYS_sendfile = 393; # old is 336 - } -} - -############################################################################ -# sendfile functions -############################################################################ - -unless ($SYS_sendfile) { - _load_syscall(); - $SYS_sendfile = eval { &SYS_sendfile; } || 0; -} - -sub sendfile_defined { return $SYS_sendfile ? 1 : 0; } - -if ($^O eq "linux" && $SYS_sendfile) { - *sendfile = \&sendfile_linux; -} elsif ($^O eq "freebsd" && $SYS_sendfile) { - *sendfile = \&sendfile_freebsd; -} else { - *sendfile = \&sendfile_noimpl; -} - -sub sendfile_noimpl { - $! = ENOSYS; - return -1; -} - -# C: ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count) -# Perl: sendfile($write_fd, $read_fd, $max_count) --> $actually_sent -sub sendfile_linux { - return syscall( - $SYS_sendfile, - $_[0] + 0, # fd - $_[1] + 0, # fd - 0, # don't keep track of offset. callers can lseek and keep track. - $_[2] + 0 # count - ); -} - -sub sendfile_freebsd { - my $offset = POSIX::lseek($_[1]+0, 0, SEEK_CUR) + 0; - my $ct = $_[2] + 0; - my $sbytes_buf = "\0" x 8; - my $rv = syscall( - $SYS_sendfile, - $_[1] + 0, # fd (from) - $_[0] + 0, # socket (to) - $offset, - $ct, - 0, # struct sf_hdtr *hdtr - $sbytes_buf, # off_t *sbytes - 0); # flags - return $rv if $rv < 0; - - - my $set = unpack("L", $sbytes_buf); - POSIX::lseek($_[1]+0, SEEK_CUR, $set); - return $set; -} - - -############################################################################ -# epoll functions -############################################################################ - -sub epoll_defined { return $SYS_epoll_create ? 1 : 0; } - -# ARGS: (size) -- but in modern Linux 2.6, the -# size doesn't even matter (radix tree now, not hash) -sub epoll_create { - return -1 unless defined $SYS_epoll_create; - my $epfd = eval { syscall($SYS_epoll_create, ($_[0]||100)+0) }; - return -1 if $@; - return $epfd; -} - -# epoll_ctl wrapper -# ARGS: (epfd, op, fd, events_mask) -sub epoll_ctl_mod4 { - syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0)); -} -sub epoll_ctl_mod8 { - syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0)); -} - -# epoll_wait wrapper -# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref) -# arrayref: values modified to be [$fd, $event] -our $epoll_wait_events; -our $epoll_wait_size = 0; -sub epoll_wait_mod4 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 12 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8)); - } - return $ct; -} - -sub epoll_wait_mod8 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 16 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - # 16 byte epoll_event structs, with format: - # 4 byte mask [idx 1] - # 4 byte padding (we put it into idx 2, useless) - # 8 byte data (first 4 bytes are fd, into idx 0) - @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12)); - } - return $ct; -} - - -1; -__END__ - -=head1 NAME - -Sys::Syscall - access system calls that Perl doesn't normally provide access to - -=head1 SYNOPSIS - - use Sys::Syscall; - -=head1 DESCRIPTION - -Use epoll, sendfile, from Perl. Mostly Linux-only support now, but -more syscalls/OSes planned for future. - -=head1 Exports - -Nothing by default. - -May export: sendfile epoll_ctl epoll_create epoll_wait EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD - -Export tags: :epoll and :sendfile - -=head1 Functions - -=head2 epoll support - -=over 4 - -=item $ok = epoll_defined() - -Returns true if epoll might be available. (caller must still test with epoll_create) - -=item $epfd = epoll_create([ $start_size ]) - -Create a new epoll filedescriptor. Returns -1 if epoll isn't available. - -=item $rv = epoll_ctl($epfd, $op, $fd, $events) - -See manpage for epoll_ctl - -=item $count = epoll_wait($epfd, $max_events, $timeout, $arrayref) - -See manpage for epoll_wait. $arrayref is an arrayref to be modified -with the items returned. The values put into $arrayref are arrayrefs -of [$fd, $state]. - -=back - -=head2 sendfile support - -=over 4 - -=item $ok = sendfile_defined() - -Returns true if sendfile should work on this operating system. - -=item $sent = sendfile($sock_fd, $file_fd, $max_send) - -Sends up to $max_send bytes from $file_fd to $sock_fd. Returns bytes -actually sent, or -1 on error. - -=back - -=head1 COPYRIGHT - -This module is Copyright (c) 2005, 2006 Six Apart, Ltd. - -All rights reserved. - -You may distribute under the terms of either the GNU General Public -License or the Artistic License, as specified in the Perl README file. -If you need more liberal licensing terms, please contact the -maintainer. - -=head1 WARRANTY - -This is free software. IT COMES WITHOUT WARRANTY OF ANY KIND. - -=head1 AUTHORS - -Brad Fitzpatrick - diff --git a/mogstored b/mogstored index 82cbf127..d11fc218 100755 --- a/mogstored +++ b/mogstored @@ -14,25 +14,6 @@ use strict; use lib 'lib'; use Mogstored::HTTPServer; -# based on where we found Mogstored::HTTPServer (a pure-perl module), -# add the mogdeps/ subdirectory of that base to our @INC search -# path, where all the misc Mogile dependencies are installed. -BEGIN { - my $libpath; - if (! $ENV{MOGILE_NO_BUILTIN_DEPS} && - ($libpath = $INC{"Mogstored/HTTPServer.pm"}) && - $libpath =~ s!Mogstored/HTTPServer.pm$!!) - { - my $dep_dir = "${libpath}mogdeps"; - push @INC, $dep_dir; - unless (($ENV{PERL5LIB} || "") =~ /$dep_dir/) { - $ENV{PERL5LIB} = join(":", - split(/:/, $ENV{PERL5LIB} || ""), - $dep_dir); - } - } -} - use IO::Socket::INET; use POSIX qw(WNOHANG); use Perlbal 1.73; From 0e94bf4cfbede14e59dcc262de4c1690ef15ce95 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 03:27:16 -0800 Subject: [PATCH 122/405] kill a little more dead fsck/Checker code I think "Checker" was renamed to Fsck long before I even worked on this project. --- lib/MogileFS/Worker/Fsck.pm | 28 ---------------------------- lib/MogileFS/Worker/Query.pm | 25 ------------------------- mogilefsd | 4 ++-- 3 files changed, 2 insertions(+), 55 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index b1c42868..5fb4e24f 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -38,32 +38,6 @@ sub work { # this can be CPU-intensive. let's nice ourselves down. POSIX::nice(10); - # - my $running = 0; # start time - my $n_check = 0; # items checked - my $start = sub { - return if $running; - $running = $nowish = time(); - }; - my $stats = sub { - return unless $running; - my $elap = $nowish - $running; - debug(sprintf("In %d secs, %d fids, %0.02f fids/sec\n", $elap, $n_check, ($n_check / ($elap || 1)))); - }; - my $last_beat = 0; - my $beat = sub { - return unless $nowish >= $last_beat + 5; - $stats->(); - $last_beat = $nowish; - }; - my $stop = sub { - return unless $running; - $stats->(); - debug("done."); - $running = 0; - }; - # - my $sto = Mgd::get_store(); my $max_checked = 0; @@ -107,8 +81,6 @@ sub work { next; } $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); - $n_check++; - $beat->(); } }); } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 1b99aa9c..7716dd25 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1355,31 +1355,6 @@ sub cmd_replicate_now { return $self->ok_line({ count => int($rv) }); } -sub cmd_checker { - my MogileFS::Worker::Query $self = shift; - my $args = shift; - - my $new_setting; - if ($args->{disable}) { - $new_setting = 'off'; - } elsif ($args->{level}) { - # they want to turn it on or change the level, so let's ensure they - # specified a valid level - if (MogileFS::Worker::Checker::is_valid_level($args->{level})) { - $new_setting = $args->{level}; - } else { - return $self->err_line('invalid_checker_level'); - } - } - - if (defined $new_setting) { - MogileFS::Config->set_server_setting('fsck_enable', $new_setting); - return $self->ok_line; - } - - $self->err_line('failure'); -} - sub cmd_set_server_setting { my MogileFS::Worker::Query $self = shift; my $args = shift; diff --git a/mogilefsd b/mogilefsd index 5301a29e..8d00d344 100755 --- a/mogilefsd +++ b/mogilefsd @@ -73,9 +73,9 @@ See L. See L. -=item B -- background filesystem consistency checker +=item B -- background filesystem consistency checker -See L. +See L. =back From 3a0ab926aaa332cc4b3bb64e45832219bf3ed0b4 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 15:43:53 -0800 Subject: [PATCH 123/405] Revert part of f63f630a use'ing MogileFS::Server everywhere seems to occasionally cause dep implosion (like testing MogileFS::Network) Might need to put the Mgd:: aliases into a separate dependency and test it better :/ For now I've removed enough to make the tests work again. None of these files call anything other than ::log or ::error so they're safe enough. --- lib/MogileFS/Config.pm | 1 - lib/MogileFS/ProcManager.pm | 1 - lib/MogileFS/Util.pm | 1 - 3 files changed, 3 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 35ccc767..ec9978d3 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -5,7 +5,6 @@ use MogileFS::ProcManager; use Getopt::Long; use MogileFS::Store; use Sys::Hostname (); -use MogileFS::Server; our @ISA = qw(Exporter); our @EXPORT = qw($DEBUG config set_config FSCK_QUEUE REBAL_QUEUE); diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index dd0b389a..706b7162 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -4,7 +4,6 @@ use warnings; use POSIX qw(:sys_wait_h sigprocmask SIGINT SIG_BLOCK SIG_UNBLOCK); use Symbol; use Socket; -use MogileFS::Server; use MogileFS::Connection::Client; use MogileFS::Connection::Worker; use MogileFS::Util qw(apply_state_events); diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 882d7e1a..e617b18c 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -2,7 +2,6 @@ package MogileFS::Util; use strict; use Carp qw(croak); use Time::HiRes; -use MogileFS::Server; use MogileFS::Exception; use MogileFS::DeviceState; From bc6ef9fee4c2f8c656f683a7034ad22de0d3d912 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 16:24:30 -0800 Subject: [PATCH 124/405] allow server setting to un-molest slave list in case you really want trackers in all DC's to talk to all slaves also, don't hide the slave connection error. --- lib/MogileFS/Store.pm | 4 +++- lib/MogileFS/Store/MySQL.pm | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 4b06ff51..6c5022b7 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -246,7 +246,9 @@ sub get_slave { # If we have no slaves, then return silently. return unless @slaves_list; - MogileFS::run_global_hook('slave_list_filter', \@slaves_list); + unless (MogileFS::Config->server_setting_cached('slave_skip_filtering') eq 'on') { + MogileFS::run_global_hook('slave_list_filter', \@slaves_list); + } my $dead_retry = MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index bb2229a1..73d68a02 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -94,7 +94,10 @@ sub check_slave { # TODO: Check show slave status *unless* a server setting is present to # tell us to ignore it (like in a multi-DC setup). eval { $self->{slave}->dbh }; - return 0 if $@; + if ($@) { + warn "Error while checking slave: $@"; + return 0; + } # call time() again here because SQL blocks. $$next_check = time() + 5; From f1122117a2be266bc951dd06cadebd389c2d90f5 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 6 Dec 2011 17:08:41 -0800 Subject: [PATCH 125/405] ensure dead slaves get retried previous code would not retry unless a current slave died, list was empty, or config changed. --- lib/MogileFS/Store.pm | 51 ++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 6c5022b7..a70f49fd 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -210,7 +210,6 @@ sub _slaves_list { push @ret, [$dsn, $user, $pass] } - $self->{slave_list_cache} = \@ret; return @ret; } @@ -221,13 +220,43 @@ sub _pick_slave { return $self->{connected_slaves}->{$temp[0]}; } -sub get_slave { +sub _connect_slave { my $self = shift; + my $slave_fulldsn = shift; my $now = time(); + my $dead_retry = + MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; + + my $dead_timeout = $self->{dead_slaves}->{$slave_fulldsn->[0]}; + return if (defined $dead_timeout && $dead_timeout + $dead_retry > $now); + return if ($self->{connected_slaves}->{$slave_fulldsn->[0]}); + + my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); + $self->{slave}->{next_check} = 0; + $newslave->mark_as_slave; + if ($self->check_slave) { + $self->{connected_slaves}->{$slave_fulldsn->[0]} = $newslave; + } else { + $self->{dead_slaves}->{$slave_fulldsn->[0]} = $now; + } +} + +sub get_slave { + my $self = shift; + die "Incapable of having slaves." unless $self->can_do_slaves; $self->{slave} = undef; + foreach my $slave (keys %{$self->{dead_slaves}}) { + my ($full_dsn) = grep { $slave eq $_->[0] } @{$self->{slave_list_cache}}; + unless ($full_dsn) { + delete $self->{dead_slaves}->{$slave}; + next; + } + $self->_connect_slave($full_dsn); + } + unless ($self->_slaves_list_changed) { if ($self->{slave} = $self->_pick_slave) { $self->{slave}->{recheck_req_gen} = $self->{recheck_req_gen}; @@ -237,7 +266,7 @@ sub get_slave { if ($self->{slave}) { my $dsn = $self->{slave}->{dsn}; - $self->{dead_slaves}->{$dsn} = $now; + $self->{dead_slaves}->{$dsn} = time(); delete $self->{connected_slaves}->{$dsn}; error("Error talking to slave: $dsn"); } @@ -250,22 +279,10 @@ sub get_slave { MogileFS::run_global_hook('slave_list_filter', \@slaves_list); } - my $dead_retry = - MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; + $self->{slave_list_cache} = \@slaves_list; foreach my $slave_fulldsn (@slaves_list) { - my $dead_timeout = $self->{dead_slaves}->{$slave_fulldsn->[0]}; - next if (defined $dead_timeout && $dead_timeout + $dead_retry > $now); - next if ($self->{connected_slaves}->{$slave_fulldsn->[0]}); - - my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); - $self->{slave}->{next_check} = 0; - $newslave->mark_as_slave; - if ($self->check_slave) { - $self->{connected_slaves}->{$slave_fulldsn->[0]} = $newslave; - } else { - $self->{dead_slaves}->{$slave_fulldsn->[0]} = $now; - } + $self->_connect_slave($slave_fulldsn); } if ($self->{slave} = $self->_pick_slave) { From ddb1bc48c5f324e8b2c4065cd38e9636af394f5f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 13:55:41 -0800 Subject: [PATCH 126/405] sqlite: consider SQLITE_BUSY and SQLITE_LOCKED deadlocks This appears to work for me and allows JobMaster to not die during fsck. --- lib/MogileFS/Store/SQLite.pm | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index a66efb3c..984ebd48 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -56,9 +56,14 @@ sub column_type { # Store-related things we override # -------------------------------------------------------------------------- -# FIXME: Figure out how to properly detect this. +# from sqlite3.h: +use constant SQLITE_BUSY => 5; # The database file is locked +use constant SQLITE_LOCKED => 6; # A table in the database is locked + sub was_deadlock_error { - return 0; + my $err = $_[0]->dbh->err or return 0; + + ($err == SQLITE_BUSY || $err == SQLITE_LOCKED); } sub was_duplicate_error { From 7f17d49606e5e768fc3b1fb00c4886daba1785b5 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Thu, 29 Dec 2011 11:35:11 +0000 Subject: [PATCH 127/405] Ping parent as needed to stop watchdog timeouts of monitor job. This fixes the watchdog repeatedly shooting the monitor job when you have a lot of devices and things are a little slow. --- CHANGES | 3 +++ lib/MogileFS/Worker/Monitor.pm | 2 ++ 2 files changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index d03029f9..ccb3122d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ + * Fixed issue where too many devices being active could cause the monitor job + to be timed out by the watchdog. + 2011-11-14: Release version 2.55 * fixed sources in replication are now a suggestion (dormando ) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 37e9f3c7..bf2ec7d3 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -76,6 +76,8 @@ sub usage_refresh { $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; $self->check_device($dev, $have_dbh) if $dev->dstate->should_monitor; + $self->still_alive; # Ping parent if needed so we don't time out + # given lots of devices. } $self->{devutil}->{prev} = $cur_iow; From 68ef57683f5418a46d7ffa4ce482733d820872aa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 6 Jan 2012 09:33:05 +0000 Subject: [PATCH 128/405] fix "set_weight" command The sub we previously depended on was removed in commit ebf8a5a8dc9b4452671f7816b99583bf7934e9b1 --- lib/MogileFS/Worker/Query.pm | 2 +- t/10-weighting.t | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 7716dd25..689d1159 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1313,7 +1313,7 @@ sub cmd_set_weight { return $self->err_line('host_mismatch') unless $dev->host->hostname eq $hostname; - $dev->set_weight($weight); + Mgd::get_store()->set_device_weight($dev->id, $weight); return $self->cmd_clear_cache; } diff --git a/t/10-weighting.t b/t/10-weighting.t index a97b9287..b0057548 100644 --- a/t/10-weighting.t +++ b/t/10-weighting.t @@ -27,7 +27,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 17; + plan tests => 19; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -76,6 +76,10 @@ ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); +# just ensure the "set_weight" command doesn't ERR out +ok($tmptrack->mogadm("device", "modify", "hostA", 1, "--weight=50"), "set dev1 weight=50 on hostA"); +ok($tmptrack->mogadm("device", "modify", "hostB", 2, "--weight=50"), "set dev2 weight=50 on hostB"); + # wait for monitor { my $was = $be->{timeout}; # can't use local on phash :( From 315ea1fee68c0281ee0005402e2f5dd4a180744a Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 16 Jan 2012 20:53:00 -0800 Subject: [PATCH 129/405] fix potential hang in JobMaster $dbh->err gets reset after every request, so when I added the queue lock/unlock I think it masked the deadlock errors. Which means transactions would occasionally start then never finish. Rewired the error detection to mostly work, then added a catchall at the bottom just in case. needs more work in the future. --- lib/MogileFS/Store.pm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index a70f49fd..cfbea32c 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1663,12 +1663,18 @@ sub grab_queue_chunk { $dbh->do("UPDATE $queue SET nexttry = $ut + 1000 WHERE fid IN ($fidlist)"); $dbh->commit; }; - $self->unlock_queue($queue); if ($self->was_deadlock_error) { eval { $dbh->rollback }; - return (); + $work = undef; + } else { + $self->condthrow; } - $self->condthrow; + # FIXME: Super extra paranoia to prevent deadlocking. + # Need to handle or die on all errors above, but $@ can get reset. For now + # we'll just always ensure there's no transaction running at the end here. + # A (near) release should figure the error detection correctly. + if ($dbh->{AutoCommit} == 0) { eval { $dbh->rollback }; } + $self->unlock_queue($queue); return defined $work ? values %$work : (); } From dcdd152d002d6be813816ab380af9d99477a59f3 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 27 Jan 2012 17:56:51 -0800 Subject: [PATCH 130/405] Add connect timeouts This should work with the primarily supported databases. Mainly this is for slave setups, so slaves which are broken but not immediately RST'ing or error'ing get backed off from. You'll still get some watchdog hits when a slave first dies, but it'll handle it properly from there with minimal delays. --- lib/MogileFS/Store.pm | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index cfbea32c..b5ae44d1 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -56,6 +56,8 @@ sub new_from_dsn_user_pass { handles_left => 0, # amount of times this handle can still be verified connected_slaves => {}, dead_slaves => {}, + dead_backoff => {}, # how many times in a row a slave has died + connect_timeout => 30, # High default. }, $subclass; $self->init; return $self; @@ -152,10 +154,14 @@ sub raise_errors { $self->dbh->{RaiseError} = 1; } +sub set_connect_timeout { $_[0]{connect_timeout} = $_[1]; } + sub dsn { $_[0]{dsn} } sub user { $_[0]{user} } sub pass { $_[0]{pass} } +sub connect_timeout { $_[0]{connect_timeout} } + sub init { 1 } sub post_dbi_connect { 1 } @@ -228,17 +234,25 @@ sub _connect_slave { my $dead_retry = MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; + my $dead_backoff = $self->{dead_backoff}->{$slave_fulldsn->[0]} || 0; my $dead_timeout = $self->{dead_slaves}->{$slave_fulldsn->[0]}; - return if (defined $dead_timeout && $dead_timeout + $dead_retry > $now); + return if (defined $dead_timeout + && $dead_timeout + ($dead_retry * $dead_backoff) > $now); return if ($self->{connected_slaves}->{$slave_fulldsn->[0]}); my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); + $newslave->set_connect_timeout( + MogileFS::Config->server_setting_cached('slave_connect_timeout') || 1); $self->{slave}->{next_check} = 0; $newslave->mark_as_slave; if ($self->check_slave) { $self->{connected_slaves}->{$slave_fulldsn->[0]} = $newslave; + $self->{dead_backoff}->{$slave_fulldsn->[0]} = 0; } else { + # Magic numbers are saddening... + $dead_backoff++ unless $dead_backoff > 20; $self->{dead_slaves}->{$slave_fulldsn->[0]} = $now; + $self->{dead_backoff}->{$slave_fulldsn->[0]} = $dead_backoff; } } @@ -267,6 +281,7 @@ sub get_slave { if ($self->{slave}) { my $dsn = $self->{slave}->{dsn}; $self->{dead_slaves}->{$dsn} = time(); + $self->{dead_backoff}->{$dsn} = 0; delete $self->{connected_slaves}->{$dsn}; error("Error talking to slave: $dsn"); } @@ -336,13 +351,22 @@ sub dbh { return $self->{dbh} if $self->{dbh}; } - $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { - PrintError => 0, - AutoCommit => 1, - # FUTURE: will default to on (have to validate all callers first): - RaiseError => ($self->{raise_errors} || 0), - }) or + eval { + local $SIG{ALRM} = sub { die "timeout\n" }; + alarm($self->connect_timeout); + $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { + PrintError => 0, + AutoCommit => 1, + # FUTURE: will default to on (have to validate all callers first): + RaiseError => ($self->{raise_errors} || 0), + }); + }; + alarm(0); + if ($@ eq "timeout\n") { + die "Failed to connect to database: timeout"; + } elsif ($@) { die "Failed to connect to database: " . DBI->errstr; + } $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; return $self->{dbh}; From 18c419580667c76bba35bce413db38242566d7f1 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 27 Jan 2012 18:03:54 -0800 Subject: [PATCH 131/405] Checking in changes prior to tagging of version 2.56. Changelog diff is: diff --git a/CHANGES b/CHANGES index ccb3122..6423acc 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,33 @@ +2012-01-27: Release version 2.56 + + * Add database connect timeouts (dormando ) + + * fix potential hang in JobMaster added in 2.55 (dormando ) + + * fix "set_weight" command (Eric Wong ) + + * sqlite: consider SQLITE_BUSY and SQLITE_LOCKED deadlocks (Eric Wong ) + + * kill a little more dead fsck/Checker code (dormando ) + + * Destroy mogdeps (dormando ) + + * improve slave handling code (dormando ) + + * don't spew errors if the master goes down (dormando ) + + * provide an async server setting cache via monitor worker (dormando ) + + * fix return value of commands that clear_cache (Eric Wong ) + + * make reaper wait less time in test mode (dormando ) + + * queryworker: reimplement "clear_caches" command (Eric Wong ) + + * worker: use timeout with read_from_parent() instead of sleep (Eric Wong ) + * Fixed issue where too many devices being active could cause the monitor job - to be timed out by the watchdog. + to be timed out by the watchdog. (Tomas Doran) 2011-11-14: Release version 2.55 --- CHANGES | 30 +++++++++++++++++++++++++++++- lib/MogileFS/Server.pm | 2 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index ccb3122d..6423acc1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,33 @@ +2012-01-27: Release version 2.56 + + * Add database connect timeouts (dormando ) + + * fix potential hang in JobMaster added in 2.55 (dormando ) + + * fix "set_weight" command (Eric Wong ) + + * sqlite: consider SQLITE_BUSY and SQLITE_LOCKED deadlocks (Eric Wong ) + + * kill a little more dead fsck/Checker code (dormando ) + + * Destroy mogdeps (dormando ) + + * improve slave handling code (dormando ) + + * don't spew errors if the master goes down (dormando ) + + * provide an async server setting cache via monitor worker (dormando ) + + * fix return value of commands that clear_cache (Eric Wong ) + + * make reaper wait less time in test mode (dormando ) + + * queryworker: reimplement "clear_caches" command (Eric Wong ) + + * worker: use timeout with read_from_parent() instead of sleep (Eric Wong ) + * Fixed issue where too many devices being active could cause the monitor job - to be timed out by the watchdog. + to be timed out by the watchdog. (Tomas Doran) 2011-11-14: Release version 2.55 diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 4841407f..f49a9702 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.55"; +$VERSION = "2.56"; =head1 NAME From f46faa0684e3dd75ae19dbc3ae41722030b96be2 Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Sun, 15 Jan 2012 04:30:17 -0800 Subject: [PATCH 132/405] Need a hook to be able to check the slave list during startup. --- lib/MogileFS/Store.pm | 32 +++++++++++++++++++++++++++++++- lib/MogileFS/Store/MySQL.pm | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b5ae44d1..1d4ef922 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -2043,7 +2043,37 @@ sub fsck_evcode_counts { # run before daemonizing. you can die from here if you see something's amiss. or emit # warnings. -sub pre_daemonize_checks { } +sub pre_daemonize_checks { + my $self = shift; + + $self->pre_daemonize_check_slaves; +} + +sub pre_daemonize_check_slaves { + my $sk = MogileFS::Config->server_setting('slave_keys') + or return; + + my @slaves; + foreach my $key (split /\s*,\s*/, $sk) { + my $slave = MogileFS::Config->server_setting("slave_$key"); + + if (!$slave) { + error("key for slave DB config: slave_$key not found in configuration"); + next; + } + + my ($dsn, $user, $pass) = split /\|/, $slave; + if (!defined($dsn) or !defined($user) or !defined($pass)) { + error("key slave_$key contains $slave, which doesn't split in | into DSN|user|pass - ignoring"); + next; + } + push @slaves, [$dsn, $user, $pass] + } + + return unless @slaves; # Escape this block if we don't have a set of slaves anyways + + MogileFS::run_global_hook('slave_list_check', \@slaves); +} # attempt to grab a lock of lockname, and timeout after timeout seconds. diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 73d68a02..c0d810d7 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -442,6 +442,7 @@ sub pre_daemonize_checks { die "MySQL self-tests failed. Your DBD::mysql might've been built against an old DBI version.\n"; } + return $self->SUPER::pre_daemonize_checks(); } 1; From 9ca00e7f49b03106fed3bead29095a2b79b303e0 Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Tue, 31 Jan 2012 14:27:19 -0800 Subject: [PATCH 133/405] Silence a warning about undefined value introduced by the slave database ordering work --- lib/MogileFS/Store.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 1d4ef922..dc78c2bb 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -290,7 +290,9 @@ sub get_slave { # If we have no slaves, then return silently. return unless @slaves_list; - unless (MogileFS::Config->server_setting_cached('slave_skip_filtering') eq 'on') { + my $slave_skip_filtering = MogileFS::Config->server_setting('slave_skip_filtering'); + + unless (defined $slave_skip_filtering && $slave_skip_filtering eq 'on') { MogileFS::run_global_hook('slave_list_filter', \@slaves_list); } From 4ede37c6558b441fb554848ed733391a199c9c24 Mon Sep 17 00:00:00 2001 From: Jonathan Steinert Date: Tue, 31 Jan 2012 15:40:39 -0800 Subject: [PATCH 134/405] CHANGES entries for last two commits --- CHANGES | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES b/CHANGES index 6423acc1..10ecf03a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ + * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) + + * Need a hook to be able to check the slave list during startup (hachi@kuiki.net) + 2012-01-27: Release version 2.56 * Add database connect timeouts (dormando ) From 151708669ee85b64233341ed3393724a058a1aba Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 31 Jan 2012 16:27:56 -0800 Subject: [PATCH 135/405] Checking in changes prior to tagging of version 2.57. Changelog diff is: diff --git a/CHANGES b/CHANGES index 10ecf03..46306bc 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2012-01-31: Release version 2.57 + * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) * Need a hook to be able to check the slave list during startup (hachi@kuiki.net) --- CHANGES | 2 ++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 10ecf03a..46306bc8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2012-01-31: Release version 2.57 + * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) * Need a hook to be able to check the slave list during startup (hachi@kuiki.net) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index f49a9702..9dd022e0 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.56"; +$VERSION = "2.57"; =head1 NAME From 400307d4084da9efdac71f9b375d8d0983dd1d52 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 23 Feb 2012 00:32:55 -0800 Subject: [PATCH 136/405] fix "fsck never stops" issue also probably an issue with rebalance. was losing the key for remove events on server_settings --- lib/MogileFS/Util.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index e617b18c..10eacfa8 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -37,7 +37,7 @@ sub apply_state_events { # This special case feels gross, but that's what it is. if ($type eq 'srvset') { my $val = $mode eq 'set' ? $args->{value} : undef; - MogileFS::Config->cache_server_setting($args->{field}, $val); + MogileFS::Config->cache_server_setting($id, $val); next; } From 0d83311d52045954e0711dc831fdb5ec27a1da0e Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 26 Feb 2012 23:42:27 -0800 Subject: [PATCH 137/405] use cached setting for slave list also switches to "confess" for DB connect failures. otherwise impossible to tell which call tried to access a DB it shouldn't be talking to. --- lib/MogileFS/Store.pm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index dc78c2bb..d780de9e 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1,7 +1,7 @@ package MogileFS::Store; use strict; use warnings; -use Carp qw(croak); +use Carp qw(croak confess); use MogileFS::Util qw(throw max error); use DBI; # no reason a Store has to be DBI-based, but for now they all are. use List::Util qw(shuffle); @@ -57,7 +57,7 @@ sub new_from_dsn_user_pass { connected_slaves => {}, dead_slaves => {}, dead_backoff => {}, # how many times in a row a slave has died - connect_timeout => 30, # High default. + connect_timeout => 10, # High default. }, $subclass; $self->init; return $self; @@ -290,7 +290,7 @@ sub get_slave { # If we have no slaves, then return silently. return unless @slaves_list; - my $slave_skip_filtering = MogileFS::Config->server_setting('slave_skip_filtering'); + my $slave_skip_filtering = MogileFS::Config->server_setting_cached('slave_skip_filtering'); unless (defined $slave_skip_filtering && $slave_skip_filtering eq 'on') { MogileFS::run_global_hook('slave_list_filter', \@slaves_list); @@ -365,9 +365,9 @@ sub dbh { }; alarm(0); if ($@ eq "timeout\n") { - die "Failed to connect to database: timeout"; + confess "Failed to connect to database: timeout"; } elsif ($@) { - die "Failed to connect to database: " . DBI->errstr; + confess "Failed to connect to database: " . DBI->errstr; } $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; From d58c1b906c1b6722270468fe956eb76edbbc2499 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 26 Feb 2012 23:43:27 -0800 Subject: [PATCH 138/405] use DB server_settings if none are cached yet allows startup code to call "server_settings_cached" without having to wait for the monitor worker to run once. --- lib/MogileFS/Config.pm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index ec9978d3..1a16cbca 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -21,6 +21,7 @@ use constant DEVICE_SUMMARY_CACHE_TIMEOUT => 15; my %conf; my %server_settings; +my $has_cached_settings = 0; sub set_config { shift if @_ == 3; my ($k, $v) = @_; @@ -271,6 +272,7 @@ sub server_setting { sub cache_server_setting { my ($class, $key, $val) = @_; + $has_cached_settings++ unless $has_cached_settings; if (! defined $val) { delete $server_settings{$key} if exists $server_settings{$key}; @@ -280,6 +282,9 @@ sub cache_server_setting { sub server_setting_cached { my ($class, $key) = @_; + unless ($has_cached_settings) { + return MogileFS::Config->server_setting($key); + } return $server_settings{$key}; } From 1597f84c155d63ba8a6d7309d013d6f418925592 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 27 Feb 2012 01:39:30 -0800 Subject: [PATCH 139/405] Back off if master DB is down If monitor worker figures master DB is down, send a flag telling workers to avoid attempting to connect to the master. If a worker is already connected and it functions, it will ignore the flag. If a worker is starting up or reconnecting to the master, it will avoid the attempt. After this commit a timed-out master DB will finally not cause the tracker to fail, and reads will continue to work. Takes almost a minute to notice the master is gone though. some room for improvement there. --- lib/MogileFS/Config.pm | 5 +++-- lib/MogileFS/Store.pm | 16 ++++++++++++---- lib/MogileFS/Worker/Monitor.pm | 22 ++++++++++++++++++---- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 1a16cbca..1857b46e 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -281,8 +281,9 @@ sub cache_server_setting { } sub server_setting_cached { - my ($class, $key) = @_; - unless ($has_cached_settings) { + my ($class, $key, $fallback) = @_; + $fallback = 1 unless (defined $fallback); + if (!$has_cached_settings && $fallback) { return MogileFS::Config->server_setting($key); } return $server_settings{$key}; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index d780de9e..68ac9784 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -171,12 +171,12 @@ sub mark_as_slave { my $self = shift; die "Incapable of becoming slave." unless $self->can_do_slaves; - $self->{slave} = 1; + $self->{is_slave} = 1; } sub is_slave { my $self = shift; - return $self->{slave}; + return $self->{is_slave}; } sub _slaves_list_changed { @@ -353,6 +353,14 @@ sub dbh { return $self->{dbh} if $self->{dbh}; } + # Shortcut flag: if monitor thinks the master is down, avoid attempting to + # connect to it for now. If we already have a connection to the master, + # keep using it as above. + if (!$self->is_slave) { + my $flag = MogileFS::Config->server_setting_cached('_master_db_alive', 0); + return if (defined $flag && $flag == 0);; + } + eval { local $SIG{ALRM} = sub { die "timeout\n" }; alarm($self->connect_timeout); @@ -365,9 +373,9 @@ sub dbh { }; alarm(0); if ($@ eq "timeout\n") { - confess "Failed to connect to database: timeout"; + die "Failed to connect to database: timeout"; } elsif ($@) { - confess "Failed to connect to database: " . DBI->errstr; + die "Failed to connect to database: " . DBI->errstr; } $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index bf2ec7d3..5e2c6912 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -13,6 +13,7 @@ use fields ( 'prev_data', # DB data from previous run 'devutil', # Running tally of device utilization 'events', # Queue of state events + 'have_masterdb', # Hint flag for if the master DB is available ); use Danga::Socket 1.56; @@ -35,6 +36,7 @@ sub new { device => {} }; $self->{devutil} = { cur => {}, prev => {} }; $self->{events} = []; + $self->{have_masterdb} = 0; return $self; } @@ -46,12 +48,24 @@ sub cache_refresh { my $self = shift; debug("Monitor running; checking DB for updates"); - return unless $self->validate_dbh; + # "Fix" our local cache of this flag, so we always check the master DB. + MogileFS::Config->cache_server_setting('_master_db_alive', 1); + my $have_dbh = $self->validate_dbh; + if ($have_dbh && !$self->{have_masterdb}) { + $self->{have_masterdb} = 1; + $self->set_event('srvset', '_master_db_alive', { value => 1 }); + } elsif (!$have_dbh) { + $self->{have_masterdb} = 0; + $self->set_event('srvset', '_master_db_alive', { value => 0 }); + error("Cannot connect to master database!"); + } - my $db_data = $self->grab_all_data; + if ($have_dbh) { + my $db_data = $self->grab_all_data; - # Stack diffs to ship back later - $self->diff_data($db_data); + # Stack diffs to ship back later + $self->diff_data($db_data); + } $self->send_events_to_parent; } From ebe020f2c458273b33a13ae1e133c31e775bf1ee Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:03 +0000 Subject: [PATCH 140/405] New variable that disables devcount updates Devcount field is only useful for statistics and once you have enough files it's not that useful even for that. Downing a device generates less I/O load on the DB when devcount updates are disabled, there aren't any queries against the file table. Correct devcount can be restored by disabling the flag and running fsck. --- lib/MogileFS/Config.pm | 1 + lib/MogileFS/FID.pm | 4 +++- lib/MogileFS/Worker/Fsck.pm | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 1857b46e..f7ea81ee 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -362,6 +362,7 @@ sub server_setting_is_writable { # let slave settings go through unmodified, for now. if ($key =~ /^slave_/) { return $del_if_blank }; if ($key eq "enable_rebalance") { return $bool }; + if ($key eq "skip_devcount") { return $bool }; if ($key eq "memcache_servers") { return $any }; if ($key eq "internal_queue_limit") { return $num }; diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 0012582b..3e3cc92a 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -118,6 +118,8 @@ sub update_devcount { my $no_lock = delete $opts{no_lock}; croak "Bogus options" if %opts; + return 1 if MogileFS::Config->server_setting_cached('skip_devcount'); + my $fidid = $self->{fidid}; my $sto = Mgd::get_store(); @@ -221,7 +223,7 @@ sub devids_meet_policy { my @devs = $self->devs; # This is a little heavy handed just to fix the 'devcount' cache, but # doing it here ensures we get the error logged. - if (@devs != $self->devcount) { + unless (MogileFS::Config->server_setting_cached('skip_devcount') || @devs == $self->devcount) { return 0; } diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 5fb4e24f..6a4bef85 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -127,7 +127,7 @@ sub check_fid { } # This is a simple fixup case - unless (scalar($fid->devids) == $fid->devcount) { + unless (MogileFS::Config->server_setting_cached('skip_devcount') || scalar($fid->devids) == $fid->devcount) { # log a bad count $fid->fsck_log(EV_BAD_COUNT); @@ -291,7 +291,7 @@ sub fix_fid { } # Clean up the device count if it's wrong - unless(scalar($fid->devids) == $fid->devcount) { + unless(MogileFS->config('skip_devcount') || scalar($fid->devids) == $fid->devcount) { $fid->update_devcount(); $fid->fsck_log(EV_BAD_COUNT); } From 6083d9d6ed94751b75f36912a144d68801eb1ff3 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:06 +0000 Subject: [PATCH 141/405] Some storage servers can create full upload path during upload New setting that allows admins to skip MKCOLs against storage nodes. Some servers (nginx) can create can create full directory tree when a PUT request is made. This will make cmd_create_open little bit faster because trackers do not need to connect to storage nodes. Needs this in nginx: create_full_put_path on; --- lib/MogileFS/Config.pm | 1 + lib/MogileFS/Device.pm | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index f7ea81ee..60cb10f0 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -363,6 +363,7 @@ sub server_setting_is_writable { if ($key =~ /^slave_/) { return $del_if_blank }; if ($key eq "enable_rebalance") { return $bool }; if ($key eq "skip_devcount") { return $bool }; + if ($key eq "skip_mkcol") { return $bool }; if ($key eq "memcache_servers") { return $any }; if ($key eq "internal_queue_limit") { return $num }; diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index b559fc33..2740d68b 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -173,7 +173,7 @@ my $dir_made_lastclean = 0; # returns 1 on success, 0 on failure sub create_directory { my ($self, $uri) = @_; - return 1 if $self->doesnt_know_mkcol; + return 1 if $self->doesnt_know_mkcol || MogileFS::Config->server_setting_cached('skip_mkcol'); # rfc2518 says we "should" use a trailing slash. Some servers # (nginx) appears to require it. From 54bb4f932726b0f5c96f2ff8079f51e8c076d985 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:10 +0000 Subject: [PATCH 142/405] Filter out illegal memcache servers from memcache_servers If you set incorrect memcache servers using mogadm: mogadm settings set memcache_servers failure Tracker query workers will start dying with: Use of uninitialized value $ip in hash element at /usr/share/perl5/Cache/Memcached.pm line 253. Use of uninitialized value in subroutine entry at /usr/share/perl5/Cache/Memcached.pm line 288. Error: Bad arg length for Socket::pack_sockaddr_in, length is 0, should be 4 at /usr/lib/perl/5.10/Socket.pm line 214. [Mon Jan 30 14:23:24 2012] [queryworker(29561)] Error running command 'get_paths': Bad arg length for Socket::pack_sockaddr_in, length is 0, should be 4 at /usr/lib/perl/5.10/Socket.pm line 214. --- lib/MogileFS/Config.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 60cb10f0..8735e3b7 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -300,7 +300,7 @@ sub memcache_client { my $now = time(); return $memc if $last_memc_server_fetch > $now - 30; - my @servers = split(/\s*,\s*/, MogileFS::Config->server_setting_cached("memcache_servers") || ""); + my @servers = grep(/:\d+$/, split(/\s*,\s*/, MogileFS::Config->server_setting_cached("memcache_servers") || "")); $memc->set_servers(\@servers); $last_memc_server_fetch = $now; From d9368ac7bce1a2908ead50d9aa9f980f2a107131 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:13 +0000 Subject: [PATCH 143/405] Unset memc when memcache servers have been removed This allows one to get the client and see if memcache is being used or not e.g: my $memc = MogileFS::Config->memcache_client; if ($memc) { ..dostuff.. } --- lib/MogileFS/Config.pm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 8735e3b7..11b6e6b0 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -294,16 +294,19 @@ my $last_memc_server_fetch = 0; my $have_memc_module = eval "use Cache::Memcached; 1;"; sub memcache_client { return undef unless $have_memc_module; - $memc ||= Cache::Memcached->new; # only reload the server list every 30 seconds my $now = time(); return $memc if $last_memc_server_fetch > $now - 30; my @servers = grep(/:\d+$/, split(/\s*,\s*/, MogileFS::Config->server_setting_cached("memcache_servers") || "")); - $memc->set_servers(\@servers); $last_memc_server_fetch = $now; + return ($memc = undef) unless @servers; + + $memc ||= Cache::Memcached->new; + $memc->set_servers(\@servers); + return $memc; } From 42f4a903f2ed6c15cd9ebf955f14cf4e0e7264e6 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:16 +0000 Subject: [PATCH 144/405] Remove FID from memcache when it is deleted Removes domain-dkey -> FID mapping from memcache when a file is deleted. Memcached LRU will take care of FID -> devid mapping. --- lib/MogileFS/FID.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 3e3cc92a..5ad6a414 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -160,7 +160,14 @@ sub mark_unreachable { sub delete { my $fid = shift; my $sto = Mgd::get_store(); + my $memc = MogileFS::Config->memcache_client; + if ($memc) { + $fid->_tryload; + } $sto->delete_fidid($fid->id); + if ($memc && $fid->{_loaded}) { + $memc->delete("mogfid:$fid->{dmid}:$fid->{dkey}"); + } } # returns 1 on success, 0 on duplicate key error, dies on exception From 5093595374a93cc2f58d4dfe0ed91554a44e723c Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 16 Feb 2012 08:57:19 +0000 Subject: [PATCH 145/405] Populate memcache even when noverify is missing Because memcache is faster than DB, it makes sense to try to populate memcache every time we fetch the values from database. This also adds a configurable TTL for the cache, making it possible to cache the values for much longer, thus making caching actually useful. --- lib/MogileFS/Config.pm | 1 + lib/MogileFS/Worker/Query.pm | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 11b6e6b0..f5c67faa 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -368,6 +368,7 @@ sub server_setting_is_writable { if ($key eq "skip_devcount") { return $bool }; if ($key eq "skip_mkcol") { return $bool }; if ($key eq "memcache_servers") { return $any }; + if ($key eq "memcache_ttl") { return $num }; if ($key eq "internal_queue_limit") { return $num }; # ReplicationPolicy::MultipleNetworks diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 689d1159..2f609ae9 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1003,13 +1003,14 @@ sub cmd_get_paths { my $args = shift; # memcache mappings are as follows: - # mogfid:: -> fidid (and TODO: invalidate this when key is replaced) + # mogfid:: -> fidid # mogdevids: -> \@devids (and TODO: invalidate when the replication or deletion is run!) # if you specify 'noverify', that means a correct answer isn't needed and memcache can # be used. - my $use_memc = $args->{noverify}; - my $memc = $use_memc ? MogileFS::Config->memcache_client : undef; + my $memc = MogileFS::Config->memcache_client; + my $get_from_memc = $memc && $args->{noverify}; + my $memcache_ttl = MogileFS::Config->server_setting_cached("memcache_ttl") || 3600; # validate domain for plugins $args->{dmid} = $self->check_domain($args) @@ -1033,7 +1034,7 @@ sub cmd_get_paths { my $fid; my $need_fid_in_memcache = 0; my $mogfid_memkey = "mogfid:$args->{dmid}:$key"; - if ($memc) { + if ($get_from_memc) { if (my $fidid = $memc->get($mogfid_memkey)) { $fid = MogileFS::FID->new($fidid); } else { @@ -1048,7 +1049,7 @@ sub cmd_get_paths { } # add to memcache, if needed. for an hour. - $memc->add($mogfid_memkey, $fid->id, 3600) if $need_fid_in_memcache; + $memc->set($mogfid_memkey, $fid->id, $memcache_ttl ) if $need_fid_in_memcache || ($memc && !$get_from_memc); my $dmap = Mgd::device_factory()->map_by_id; @@ -1060,7 +1061,7 @@ sub cmd_get_paths { my @fid_devids; my $need_devids_in_memcache = 0; my $devid_memkey = "mogdevids:" . $fid->id; - if ($memc) { + if ($get_from_memc) { if (my $list = $memc->get($devid_memkey)) { @fid_devids = @$list; } else { @@ -1071,7 +1072,7 @@ sub cmd_get_paths { Mgd::get_store()->slaves_ok(sub { @fid_devids = $fid->devids; }); - $memc->add($devid_memkey, \@fid_devids, 3600) if $need_devids_in_memcache; + $memc->set($devid_memkey, \@fid_devids, $memcache_ttl ) if $need_devids_in_memcache || ($memc && !$get_from_memc); } my @devices = map { $dmap->{$_} } @fid_devids; From 2b4122081241927b617b1b91ff16c2e425c81147 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 27 Feb 2012 02:17:23 -0800 Subject: [PATCH 146/405] fix skip_devcount check from Pyry's commits --- lib/MogileFS/Worker/Fsck.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 6a4bef85..f070a363 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -291,7 +291,7 @@ sub fix_fid { } # Clean up the device count if it's wrong - unless(MogileFS->config('skip_devcount') || scalar($fid->devids) == $fid->devcount) { + unless(MogileFS::Config->server_setting_cached('skip_devcount') || scalar($fid->devids) == $fid->devcount) { $fid->update_devcount(); $fid->fsck_log(EV_BAD_COUNT); } From 91623de3f6d34688b7114ace45ff3e703f0e9f5b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 6 Dec 2011 14:11:04 -0800 Subject: [PATCH 147/405] enable TCP keepalives for accepted sockets Some clients may remain connected and idle for hours/days at a time. If their connections drop due to catastrophic failure and the TCP implementation is unable to notify the server, the server could eventually run out of file descriptors and/or memory for new clients. This change allows castastrophic network failures to be detected after roughly 2 hours on most setups. Sysadmins are free to tweak the TCP keepalive knobs in sysctl or similar. --- lib/MogileFS/Server.pm | 3 ++- lib/Mogstored/SideChannelListener.pm | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 9dd022e0..ee570723 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -27,7 +27,7 @@ use Time::HiRes (); use Net::Netmask; use LWP::UserAgent; use List::Util; -use Socket (); +use Socket qw(SO_KEEPALIVE); use MogileFS::Util qw(daemonize); use MogileFS::Sys; @@ -130,6 +130,7 @@ sub run { Reuse => 1, Listen => 1024 ) or die "Error creating socket: $@\n"; + $server->sockopt(SO_KEEPALIVE, 1); # save sub to accept a client push @servers, $server; diff --git a/lib/Mogstored/SideChannelListener.pm b/lib/Mogstored/SideChannelListener.pm index 0821951b..355662a7 100644 --- a/lib/Mogstored/SideChannelListener.pm +++ b/lib/Mogstored/SideChannelListener.pm @@ -2,6 +2,7 @@ package Mogstored::SideChannelListener; use strict; use base 'Perlbal::TCPListener'; use Mogstored::SideChannelClient; +use Socket qw(SO_KEEPALIVE); sub new { my ($class, $hostport) = @_; @@ -9,7 +10,9 @@ sub new { # exploding/warning. so we created this stub service above in our static # config, just for this. my $svc = Perlbal->service("mgmt") or die "Where is mgmt service?"; - return $class->SUPER::new($hostport, $svc); + my $self = $class->SUPER::new($hostport, $svc); + $self->{sock}->sockopt(SO_KEEPALIVE, 1); + return $self; } sub event_read { From 57fadefaad01739be6cd7f7545de16e04c81309a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 17 Jan 2012 14:01:30 -0800 Subject: [PATCH 148/405] replicate: retry on short writes to destination Neither write(2), sendto(2), nor any similar syscall is ever guaranteed to have write-in-full behavior on sockets, so we need to manually retry on unwritten portions. This is difficult to reproduce consistently (because the sockets are already in blocking mode), but occasionally gets triggered on weak networks. --- lib/MogileFS/Worker/Replicate.pm | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index eda2aed7..ca2c5683 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -643,11 +643,20 @@ sub http_copy { $remain -= $bytes; $bytes_to_read = $remain if $remain < $bytes_to_read; - my $wbytes = $dsock->send($data); - $written += $wbytes; - return $dest_error->("Error: wrote $wbytes; expected to write $bytes; failed putting to $dpath") - unless $wbytes == $bytes; - $intercopy_cb->(); + my $data_len = $bytes; + my $data_off = 0; + while (1) { + my $wbytes = syswrite($dsock, $data, $data_len, $data_off); + unless (defined $wbytes) { + return $dest_error->("Error: syswrite failed with: $!; failed putting to $dpath"); + } + $written += $wbytes; + $intercopy_cb->(); + last if ($data_len == $wbytes); + + $data_len -= $wbytes; + $data_off += $wbytes; + } die if $bytes_to_read < 0; next if $bytes_to_read; From 215dd9e34d7ae278480416a317efe6e9d5c6f1d2 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Feb 2012 17:30:36 -0800 Subject: [PATCH 149/405] note written bytes on replication PUT failure --- lib/MogileFS/Worker/Replicate.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index ca2c5683..bcac264b 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -648,7 +648,7 @@ sub http_copy { while (1) { my $wbytes = syswrite($dsock, $data, $data_len, $data_off); unless (defined $wbytes) { - return $dest_error->("Error: syswrite failed with: $!; failed putting to $dpath"); + return $dest_error->("Error: syswrite failed after $written bytes with: $!; failed putting to $dpath"); } $written += $wbytes; $intercopy_cb->(); From 662f33cbc51575ce98bb322d4fa92aa979062a91 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Feb 2012 22:46:25 -0800 Subject: [PATCH 150/405] reduce UPDATE's to device table each tracker would update all device rows every 15 seconds. For setups with lots of trackers, lots of devices, or both, this could end up with a huge number of UPDATE's per second. Now it tries pretty hard to update each row at most once every 15 seconds, at the expensive of one extra DB read per monitor cycle. --- lib/MogileFS/Worker/Monitor.pm | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 5e2c6912..e73c4051 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -4,7 +4,6 @@ use warnings; use base 'MogileFS::Worker'; use fields ( - 'last_db_update', # devid -> time. update db less often than poll interval. 'last_test_write', # devid -> time. time we last tried writing to a device. 'skip_host', # hostid -> 1 if already noted dead (reset every loop) 'seen_hosts', # IP -> 1 (reset every loop) @@ -29,7 +28,6 @@ sub new { my $self = fields::new($class); $self->SUPER::new($psock); - $self->{last_db_update} = {}; $self->{last_test_write} = {}; $self->{iow} = MogileFS::IOStatWatcher->new; $self->{prev_data} = { domain => {}, class => {}, host => {}, @@ -75,6 +73,14 @@ sub usage_refresh { debug("Monitor running; scanning usage files"); my $have_dbh = $self->validate_dbh; + my $updateable_devices; + + # See if we should be allowed to update the device table rows. + if ($have_dbh && Mgd::get_store()->get_lock('mgfs:device_update', 0)) { + # Fetch the freshlist list of entries, to avoid excessive writes. + $updateable_devices = { map { $_->{devid} => $_ } + Mgd::get_store()->get_all_devices }; + } $self->{skip_host} = {}; # hostid -> 1 if already noted dead. $self->{seen_hosts} = {}; # IP -> 1 @@ -89,11 +95,16 @@ sub usage_refresh { } $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev, $have_dbh) if $dev->dstate->should_monitor; + $self->check_device($dev, $have_dbh, $updateable_devices) + if $dev->dstate->should_monitor; $self->still_alive; # Ping parent if needed so we don't time out # given lots of devices. } + if ($have_dbh) { + Mgd::get_store()->release_lock('mgfs:device_update'); + } + $self->{devutil}->{prev} = $cur_iow; # Set the IOWatcher hosts (once old monitor code has been disabled) @@ -304,7 +315,7 @@ sub ua { } sub check_device { - my ($self, $dev, $have_dbh) = @_; + my ($self, $dev, $have_dbh, $updateable_devices) = @_; my $devid = $dev->id; my $host = $dev->host; @@ -367,14 +378,15 @@ sub check_device { } # only update database every ~15 seconds per device - my $last_update = $self->{last_db_update}{$dev->id} || 0; - my $next_update = $last_update + UPDATE_DB_EVERY; my $now = time(); - if ($now >= $next_update && $have_dbh) { - Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), - mb_used => int($used / 1024), - devid => $devid); - $self->{last_db_update}{$devid} = $now; + if ($have_dbh && $updateable_devices) { + my $devrow = $updateable_devices->{$devid}; + my $last = ($devrow && $devrow->{mb_asof}) ? $devrow->{mb_asof} : 0; + if ($last + UPDATE_DB_EVERY < $now) { + Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), + mb_used => int($used / 1024), + devid => $devid); + } } # next if we're not going to try this now From 2e6c35efb65f83982ddff2065997e3374d66803c Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 28 Feb 2012 22:59:24 -0800 Subject: [PATCH 151/405] Checking in changes prior to tagging of version 2.58. Changelog diff is: diff --git a/CHANGES b/CHANGES index 46306bc..824e4a2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2012-02-28: Release version 2.58 + + * reduce UPDATE's to device table (dormando ) + + * replicate: retry on short writes to destination (Eric Wong ) + + * enable TCP keepalives for accepted sockets (Eric Wong ) + + * Populate memcache even when noverify is missing (Pyry Hakulinen ) + + * Remove FID from memcache when it is deleted (Pyry Hakulinen ) + + * Unset memc when memcache servers have been removed (Pyry Hakulinen ) + + * Filter out illegal memcache servers from memcache_servers (Pyry Hakulinen ) + + * Add skip_mkcol setting for servers which do not need it (Pyry Hakulinen ) + + * skip_devcount variable that disables devcount updates (Pyry Hakulinen ) + + * Back off if master DB is down (dormando ) + + * use cached setting for slave list (dormando ) + + * fix "fsck never stops" issue (dormando ) + 2012-01-31: Release version 2.57 * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) --- CHANGES | 26 ++++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 46306bc8..824e4a28 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2012-02-28: Release version 2.58 + + * reduce UPDATE's to device table (dormando ) + + * replicate: retry on short writes to destination (Eric Wong ) + + * enable TCP keepalives for accepted sockets (Eric Wong ) + + * Populate memcache even when noverify is missing (Pyry Hakulinen ) + + * Remove FID from memcache when it is deleted (Pyry Hakulinen ) + + * Unset memc when memcache servers have been removed (Pyry Hakulinen ) + + * Filter out illegal memcache servers from memcache_servers (Pyry Hakulinen ) + + * Add skip_mkcol setting for servers which do not need it (Pyry Hakulinen ) + + * skip_devcount variable that disables devcount updates (Pyry Hakulinen ) + + * Back off if master DB is down (dormando ) + + * use cached setting for slave list (dormando ) + + * fix "fsck never stops" issue (dormando ) + 2012-01-31: Release version 2.57 * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index ee570723..c4b17bb6 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.57"; +$VERSION = "2.58"; =head1 NAME From 6c32b4c1f4e3ee83e85e049e179d226790d2683d Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 29 Feb 2012 16:20:27 -0800 Subject: [PATCH 152/405] Postgres wasn't honoring a no-wait timeout passing a 0 timeout to get_lock means "try and give up immediately" --- lib/MogileFS/Store/Postgres.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index aa34da8d..6a44ee7e 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -796,11 +796,11 @@ sub get_lock { debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; my $lock = undef; - while($timeout > 0 and not defined($lock)) { + while($timeout >= 0 and not defined($lock)) { $lock = eval { $self->dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, hostname, $$) }; if($self->was_duplicate_error) { $timeout--; - sleep 1; + sleep 1 $timeout > 0; next; } $self->condthrow; From 2a8e6a8af122bbe4509343e5ef425cb8427ae58c Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 29 Feb 2012 16:23:24 -0800 Subject: [PATCH 153/405] monitor shouldn't unlock unless it got a lock --- lib/MogileFS/Worker/Monitor.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index e73c4051..a5447183 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -101,7 +101,7 @@ sub usage_refresh { # given lots of devices. } - if ($have_dbh) { + if ($have_dbh && $updateable_devices) { Mgd::get_store()->release_lock('mgfs:device_update'); } From 1fd53bc3adf3a3a3a379e51f852fdf52406e36ce Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 29 Feb 2012 16:58:20 -0800 Subject: [PATCH 154/405] don't make SQLite error out on lock calls all of the proper wrappers return 1, so lets make the direct calls callable. --- lib/MogileFS/Store/SQLite.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 984ebd48..257cd1cc 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -49,8 +49,8 @@ sub column_type { } # Implement these for native database locking -# sub get_lock {} -# sub release_lock {} +sub get_lock { 1 } +sub release_lock { 1 } # -------------------------------------------------------------------------- # Store-related things we override From 4f9da04fd01e8f563ca6a521bbf7cc51be1e63fe Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 1 Mar 2012 02:00:42 +0000 Subject: [PATCH 155/405] postgres: fix syntax error when getting lock Introduced in commit 6c32b4c1f4e3ee83e85e049e179d226790d2683d --- lib/MogileFS/Store/Postgres.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 6a44ee7e..30c0ef6e 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -800,7 +800,7 @@ sub get_lock { $lock = eval { $self->dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, hostname, $$) }; if($self->was_duplicate_error) { $timeout--; - sleep 1 $timeout > 0; + sleep 1 if $timeout > 0; next; } $self->condthrow; From 38932fc9be173f813fc03869b9b7d3d2c3072264 Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 29 Feb 2012 20:57:41 -0800 Subject: [PATCH 156/405] Checking in changes prior to tagging of version 2.59. Changelog diff is: diff --git a/CHANGES b/CHANGES index 824e4a2..770e518 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +2012-02-29: Release version 2.59 + + * don't make SQLite error out on lock calls (dormando ) + + * Postgres wasn't honoring a no-wait timeout (dormando ) + 2012-02-28: Release version 2.58 * reduce UPDATE's to device table (dormando ) --- CHANGES | 6 ++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 824e4a28..770e5188 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +2012-02-29: Release version 2.59 + + * don't make SQLite error out on lock calls (dormando ) + + * Postgres wasn't honoring a no-wait timeout (dormando ) + 2012-02-28: Release version 2.58 * reduce UPDATE's to device table (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index c4b17bb6..3650b28c 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.58"; +$VERSION = "2.59"; =head1 NAME From b46dc21cb2fe647ad324ce7ee82fbf300455f0ca Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 6 Dec 2011 13:46:07 -0800 Subject: [PATCH 157/405] use TCP keepalive on mogstored connection mogstored can be idle for hours/days at a time between fsck runs, so ensure the connection stays alive to avoid errors and disconnects. --- lib/MogileFS/Connection/Mogstored.pm | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Connection/Mogstored.pm b/lib/MogileFS/Connection/Mogstored.pm index d7f52cb1..46bd2bcf 100644 --- a/lib/MogileFS/Connection/Mogstored.pm +++ b/lib/MogileFS/Connection/Mogstored.pm @@ -1,6 +1,7 @@ package MogileFS::Connection::Mogstored; use strict; use IO::Socket::INET; +use Socket qw(SO_KEEPALIVE); sub new { my ($class, $ip, $port) = @_; @@ -15,9 +16,11 @@ sub new { sub sock { my ($self, $timeout) = @_; return $self->{sock} if $self->{sock}; - return $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, - PeerPort => $self->{port}, - Timeout => $timeout); + $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, + PeerPort => $self->{port}, + Timeout => $timeout); + $self->{sock}->sockopt(SO_KEEPALIVE, 1); + return $self->{sock}; } sub sock_if_connected { From 7a83940212608fca81a37bc423ea4eb2a22b91cd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Nov 2011 10:45:41 +0000 Subject: [PATCH 158/405] Mogstored/SideChannelClient: factor out validate_uri We'll be reusing it soon --- lib/Mogstored/SideChannelClient.pm | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 4397a2a9..592d3c01 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -22,6 +22,15 @@ sub new { return $self; } +sub validate_uri { + my ($self, $uri) = @_; + if ($uri =~ /\.\./) { + $self->write("ERROR: uri invalid (contains ..)\r\n"); + return; + } + $uri; +} + sub event_read { my Mogstored::SideChannelClient $self = shift; @@ -37,12 +46,8 @@ sub event_read { # increase our count $self->{count}++; - # validate uri - my $uri = $1; - if ($uri =~ /\.\./) { - $self->write("ERROR: uri invalid (contains ..)\r\n"); - return; - } + my $uri = $self->validate_uri($1); + return unless defined($uri); # now stat the file to get the size and such Perlbal::AIO::aio_stat("$path$uri", sub { From 78a0b6e28197b8749b8608056d80b4dbdf54c7ab Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Nov 2011 10:45:41 +0000 Subject: [PATCH 159/405] Mogstored/SideChannelClient: add command for md5 This can save bandwidth by handling all MD5 calculation on the server side. --- lib/Mogstored/SideChannelClient.pm | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 592d3c01..6c741714 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -8,6 +8,8 @@ use fields ( 'read_buf', # unprocessed read buffer 'mogsvc', # the mogstored Perlbal::Service object ); +use Digest::MD5; +use POSIX qw(O_RDONLY); # needed since we're pretending to be a Perlbal::Socket... never idle out sub max_idle_time { return 0; } @@ -62,6 +64,12 @@ sub event_read { } $self->watch_read(0); Mogstored->iostat_subscribe($self); + } elsif ($cmd =~ /^md5 (\S+)$/) { + my $uri = $self->validate_uri($1); + return unless defined($uri); + + $self->watch_read(0); + $self->md5($path, $uri); } else { # we don't understand this so pass it on to manage command interface my @out; @@ -102,4 +110,47 @@ sub die_gracefully { Mogstored->on_sidechannel_die_gracefully; } +sub md5 { + my ($self, $path, $uri) = @_; + + Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { + my $fh = shift; + + if ($self->{closed}) { + CORE::close($fh) if $fh; + return; + } + $fh or return $self->close('aio_open_failure'); + $self->md5_fh($fh, $uri); + }); +} + +sub md5_fh { + my ($self, $fh, $uri) = @_; + my $offset = 0; + my $data = ''; + my $md5 = Digest::MD5->new; + my $total = -s $fh; + my $cb; + + $cb = sub { + unless ($_[0] > 0) { + CORE::close($fh); + return $self->write("ERR read $uri at $offset failed\r\n"); + } + my $bytes = length($data); + $offset += $bytes; + $md5->add($data); + if ($offset >= $total) { + my $content_md5 = $md5->b64digest; + $self->write("$uri md5=$content_md5==\r\n"); + CORE::close($fh); + $self->watch_read(1); + } else { + Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); + } + }; + Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); +} + 1; From 89558853636f523ce066906afe719a5399563b9c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Nov 2011 11:44:44 +0000 Subject: [PATCH 160/405] Mogstored/SideChannelClient: remove circular reference This was a trivially reproducible memory leak: (while true; do echo md5 /path/to/small/file; done) | nc $HOST $PORT --- lib/Mogstored/SideChannelClient.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 6c741714..39cddad2 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -135,6 +135,7 @@ sub md5_fh { $cb = sub { unless ($_[0] > 0) { + $cb = undef; CORE::close($fh); return $self->write("ERR read $uri at $offset failed\r\n"); } @@ -144,6 +145,7 @@ sub md5_fh { if ($offset >= $total) { my $content_md5 = $md5->b64digest; $self->write("$uri md5=$content_md5==\r\n"); + $cb = undef; CORE::close($fh); $self->watch_read(1); } else { From 9aa16284413a99e8b084da08c4fab2b829facad6 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Nov 2011 03:36:27 +0000 Subject: [PATCH 161/405] DB changes for checksum support schema_version=15 We'll use the "checksum" table to store checksums of various types, and allow this to be defined on a per-class basis via the "checksumtype" column in the class table. --- lib/MogileFS/Store.pm | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 68ac9784..082807cb 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -19,7 +19,8 @@ use List::Util qw(shuffle); # 13: modifies 'server_settings.value' to TEXT for wider values # also adds a TEXT 'arg' column to file_to_queue for passing arguments # 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB -use constant SCHEMA_VERSION => 14; +# 15: adds checksum table, adds 'checksumtype' column to 'class' table +use constant SCHEMA_VERSION => 15; sub new { my ($class) = @_; @@ -497,7 +498,7 @@ use constant TABLES => qw( domain class file tempfile file_to_delete unreachable_fids file_on file_on_corrupt host device server_settings file_to_replicate file_to_delete_later fsck_log file_to_queue - file_to_delete2 ); + file_to_delete2 checksum); sub setup_database { my $sto = shift; @@ -598,7 +599,8 @@ sub TABLE_class { PRIMARY KEY (dmid,classid), classname VARCHAR(50), UNIQUE (dmid,classname), - mindevcount TINYINT UNSIGNED NOT NULL + mindevcount TINYINT UNSIGNED NOT NULL, + checksumtype TINYINT UNSIGNED )" } @@ -804,6 +806,14 @@ sub TABLE_file_to_delete2 { )" } +sub TABLE_checksum { + "CREATE TABLE checksum ( + fid INT UNSIGNED NOT NULL PRIMARY KEY, + checksumtype TINYINT UNSIGNED NOT NULL, + checksum VARCHAR(255) NOT NULL + )" +} + # these five only necessary for MySQL, since no other database existed # before, so they can just create the tables correctly to begin with. # in the future, there might be new alters that non-MySQL databases @@ -825,6 +835,13 @@ sub upgrade_add_class_replpolicy { } } +sub upgrade_add_class_checksumtype { + my ($self) = @_; + unless ($self->column_type("class", "checksumtype")) { + $self->dowell("ALTER TABLE class ADD COLUMN checksumtype TINYINT UNSIGNED"); + } +} + # return true if deleted, 0 if didn't exist, exception if error sub delete_host { my ($self, $hostid) = @_; From fb2943237e34e9a45c9a2cb15f992abfaa6d9e0f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Nov 2011 08:15:43 +0000 Subject: [PATCH 162/405] Mogstored/SideChannelClient: remove padding on MD5 Sticking with the defaults Digest::MD5 provides us. For some installations, 2 bytes can add up to a lot in a database table. --- lib/Mogstored/SideChannelClient.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 39cddad2..deb2de8f 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -144,7 +144,7 @@ sub md5_fh { $md5->add($data); if ($offset >= $total) { my $content_md5 = $md5->b64digest; - $self->write("$uri md5=$content_md5==\r\n"); + $self->write("$uri md5=$content_md5\r\n"); $cb = undef; CORE::close($fh); $self->watch_read(1); From 5019488ab04572c069984afab6ce4594e2c4256c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Nov 2011 08:20:35 +0000 Subject: [PATCH 163/405] Mogstored/SideChannelClient: handle zero-byte files We can rely on EOF to indicate the end of the file without needing to know its size, first. --- lib/Mogstored/SideChannelClient.pm | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index deb2de8f..012b2e27 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -130,26 +130,25 @@ sub md5_fh { my $offset = 0; my $data = ''; my $md5 = Digest::MD5->new; - my $total = -s $fh; my $cb; $cb = sub { - unless ($_[0] > 0) { + my $retval = shift; + if ($retval > 0) { + my $bytes = length($data); + $offset += $bytes; + $md5->add($data); + Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); + } elsif ($retval == 0) { # EOF $cb = undef; CORE::close($fh); - return $self->write("ERR read $uri at $offset failed\r\n"); - } - my $bytes = length($data); - $offset += $bytes; - $md5->add($data); - if ($offset >= $total) { my $content_md5 = $md5->b64digest; $self->write("$uri md5=$content_md5\r\n"); - $cb = undef; - CORE::close($fh); $self->watch_read(1); } else { - Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); + $cb = undef; + CORE::close($fh); + $self->write("ERR read $uri at $offset failed\r\n"); } }; Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); From 949ce0fb67d36d5440925dee4c15808112af398f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Nov 2011 08:40:46 +0000 Subject: [PATCH 164/405] Mogstored/SideChannelClient: md5 returns -1 on open() failure This is to be consistent with the "size" command, we no longer shut a client down because of a missing file (possibly due to a DELETE race). Also, flip watch_read more consistently and try to keep it in the md5_* family of functions. --- lib/Mogstored/SideChannelClient.pm | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 012b2e27..78e56ada 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -68,7 +68,6 @@ sub event_read { my $uri = $self->validate_uri($1); return unless defined($uri); - $self->watch_read(0); $self->md5($path, $uri); } else { # we don't understand this so pass it on to manage command interface @@ -113,15 +112,20 @@ sub die_gracefully { sub md5 { my ($self, $path, $uri) = @_; + $self->watch_read(0); Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { my $fh = shift; if ($self->{closed}) { - CORE::close($fh) if $fh; - return; + CORE::close($fh) if $fh; + return; + } + if ($fh) { + $self->md5_fh($fh, $uri); + } else { + $self->watch_read(1); + $self->write("$uri md5=-1\r\n"); } - $fh or return $self->close('aio_open_failure'); - $self->md5_fh($fh, $uri); }); } From f670b4572db669c7e9e282683707f67dc0cb51a4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Nov 2011 08:09:20 +0000 Subject: [PATCH 165/405] HTTPFile: add md5 method for getting 128-bit checksum We'll try to hit the mogstored stream port and fall back to HTTP if the mogstored stream port does not work. --- lib/MogileFS/HTTPFile.pm | 107 +++++++++++++++++++++++++++++++++++++ t/40-httpfile.t | 112 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 t/40-httpfile.t diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 8fbe031a..8435a9cc 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -3,6 +3,8 @@ use strict; use warnings; use Carp qw(croak); use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); +use Digest::MD5; +use MIME::Base64; use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); @@ -137,4 +139,109 @@ sub size { } } +sub md5_mgmt { + my ($self, $ping_cb) = @_; + my $mogconn = $self->host->mogstored_conn; + my $node_timeout = MogileFS->config("node_timeout"); + my $sock; + my $rv; + my $expiry; + + my $uri = $self->{uri}; + my $req = "md5 $uri\r\n"; + my $reqlen = length $req; + + # a dead/stale socket may not be detected until we try to recv on it + # after sending a request + my $retries = 2; + + # assuming the storage node can MD5 at >=2MB/s, low expectations here + my $response_timeout = $self->size / (2 * 1024 * 1024); + + my $flag_nosignal = MogileFS::Sys->flag_nosignal; + local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; + +retry: + $sock = $mogconn->sock($node_timeout) or return; + $rv = send($sock, $req, $flag_nosignal); + if ($! || $rv != $reqlen) { + my $err = $!; + $mogconn->mark_dead; + if ($retries-- <= 0) { + $err = $err ? "send() error (md5 $uri): $err" : + "short send() (md5 $uri): $rv != $reqlen"; + $err = $mogconn->{ip} . ":" . $mogconn->{port} . " $err"; + return undeferr($err); + } + goto retry; + } + + $expiry = Time::HiRes::time() + $response_timeout; + while (!wait_for_readability(fileno($sock), 1.0) && + (Time::HiRes::time() > $expiry)) { + $ping_cb->(); + } + + $rv = <$sock>; + if (! $rv) { + $mogconn->mark_dead; + return undeferr("EOF from mogstored") if ($retries-- <= 0); + goto retry; + } elsif ($rv =~ /^\Q$uri\E md5=(\S+)\r\n/) { + my $md5 = $1; + + if ($md5 eq FILE_MISSING) { + # FIXME, this could be another error like EMFILE/ENFILE + return FILE_MISSING; + } + if (length($md5) == 22) { + # Digest::MD5->b64digest on mogstored does not pad + return decode_base64("$md5=="); + } + } elsif ($rv =~ /^ERROR /) { + return; # old server, fallback to HTTP + } + return undeferr("mogstored failed to handle (md5 $uri)"); +} + +sub md5_http { + my ($self, $ping_cb) = @_; + + # don't SIGPIPE us (why don't we just globally ignore SIGPIPE?) + my $flag_nosignal = MogileFS::Sys->flag_nosignal; + local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; + + # TODO: refactor + my $node_timeout = MogileFS->config("node_timeout"); + # Hardcoded connection cache size of 20 :( + $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); + my $digest = Digest::MD5->new; + + my %opts = ( + # default (4K) is tiny, we can try bigger, maybe 1M like replicate + ':read_size_hint' => 0x4000, + ':content_cb' => sub { + $digest->add($_[0]); + $ping_cb->(); + } + ); + + my $path = $self->{url}; + my $res = $user_agent->get($path, %opts); + + return $digest->digest if $res->is_success; + return FILE_MISSING if $res->code == 404; + return undeferr("Failed MD5 (GET) check for $path (" . $res->code . "): " + . $res->message); +} + +sub md5 { + my ($self, $ping_cb) = @_; + my $md5 = $self->md5_mgmt($ping_cb); + + return $md5 if ($md5 && $md5 ne FILE_MISSING); + + $self->md5_http($ping_cb); +} + 1; diff --git a/t/40-httpfile.t b/t/40-httpfile.t new file mode 100644 index 00000000..02157154 --- /dev/null +++ b/t/40-httpfile.t @@ -0,0 +1,112 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; + +find_mogclient_or_skip(); + +my $sto = eval { temp_store(); }; +if ($sto) { + plan tests => 13; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my $dbh = $sto->dbh; +my $rv; + +my ($hostA_ip) = (qw/127.0.1.1/); + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored($hostA_ip, $mogroot{1}); + +while (! -e "$mogroot{1}/dev1/usage") { + print "Waiting on usage...\n"; + sleep 1; +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($be->do_request("test", {}), "test ping worked"); + +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=$hostA_ip", "--status=alive"), "created hostA"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); + +# wait for monitor +{ + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("do_monitor_round", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("do_monitor_round", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +# create a file +my $fh = $mogc->new_file("file", "1copy") + or die "Failed to create file: " . $mogc->errstr; +my $data = "DATA"; +print $fh $data; +close($fh) or die "Failed to close file"; +my @paths = $mogc->get_paths("file"); + +use MogileFS::Device; +use MogileFS::Host; +use MogileFS::Config; +use MogileFS::Rebalance; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use Digest::MD5 qw/md5/; + +my $dfac = MogileFS::Factory::Device->get_factory; +my $hfac = MogileFS::Factory::Host->get_factory; + +map { $hfac->set($_) } $sto->get_all_hosts; +map { $dfac->set($_) } $sto->get_all_devices; +my @devs = $dfac->get_all; + +### Hacks to make tests work :/ +$MogileFS::Config::skipconfig = 1; +MogileFS::Config->load_config; + +my $file = MogileFS::HTTPFile->at($paths[0]); +my $md5_digest; + +$md5_digest = $file->md5_mgmt(sub {}); +ok($md5_digest eq md5("DATA"), "mgmt only"); +my $cb_called = 0; +$md5_digest = $file->md5_http(sub { $cb_called++ }); +ok(1 == $cb_called, "ping callback called"); +ok($md5_digest eq md5("DATA"), "http only"); + +$md5_digest = $file->md5(sub {}); +ok($md5_digest eq md5("DATA"), "mgmt or http"); +ok(length($md5_digest) == 16, "MD5 is 16 bytes (128 bits)"); From 2cef1215839217c2dd84cc3f02b2ff52fc3a104a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Nov 2011 08:29:06 +0000 Subject: [PATCH 166/405] Mogstored/SideChannelClient: prevent EMFILE/EMFILE DoS from md5 This isn't meant to be /that/ secure, but do not allow MD5s to be calculated in parallel because the client can /really/ pipeline the hell out of us and cause us to exhaust our open descriptors. --- lib/Mogstored/SideChannelClient.pm | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 78e56ada..0cc94be9 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -39,7 +39,11 @@ sub event_read { my $bref = $self->read(1024); return $self->close unless defined $bref; $self->{read_buf} .= $$bref; + $self->read_buf_consume; +} +sub read_buf_consume { + my $self = shift; my $path = $self->{mogsvc}->{docroot}; while ($self->{read_buf} =~ s/^(.+?)\r?\n//) { @@ -68,7 +72,7 @@ sub event_read { my $uri = $self->validate_uri($1); return unless defined($uri); - $self->md5($path, $uri); + return $self->md5($path, $uri); } else { # we don't understand this so pass it on to manage command interface my @out; @@ -123,8 +127,8 @@ sub md5 { if ($fh) { $self->md5_fh($fh, $uri); } else { - $self->watch_read(1); $self->write("$uri md5=-1\r\n"); + $self->after_long_request; } }); } @@ -148,14 +152,25 @@ sub md5_fh { CORE::close($fh); my $content_md5 = $md5->b64digest; $self->write("$uri md5=$content_md5\r\n"); - $self->watch_read(1); + $self->after_long_request; } else { $cb = undef; CORE::close($fh); $self->write("ERR read $uri at $offset failed\r\n"); + $self->after_long_request; # should we try to continue? } }; Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); } +sub after_long_request { + my $self = shift; + + if ($self->{read_buf} =~ /^(.+?)\r?\n/) { + $self->read_buf_consume; + } else { + $self->watch_read(1); + } +} + 1; From 4b97e0d32f022b7744a170053eb47f297de4f062 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Nov 2011 10:24:09 +0000 Subject: [PATCH 167/405] store: add functions for binary checksum storage Binary columns are the most space-efficient way to represent checksum hashes. MD5s are only 16 bytes, and our 64-byte column can support SHA512 if we ever wanted to. We have unit tests that pass under MySQL, Postgres, and SQLite. --- lib/MogileFS/Store.pm | 35 +++++++++++++++++++++++++++++++++- lib/MogileFS/Store/Postgres.pm | 35 ++++++++++++++++++++++++++++++++++ lib/MogileFS/Store/SQLite.pm | 3 ++- t/store.t | 19 ++++++++++++++---- 4 files changed, 86 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 082807cb..d5638eb0 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -810,7 +810,7 @@ sub TABLE_checksum { "CREATE TABLE checksum ( fid INT UNSIGNED NOT NULL PRIMARY KEY, checksumtype TINYINT UNSIGNED NOT NULL, - checksum VARCHAR(255) NOT NULL + checksum VARBINARY(64) NOT NULL )" } @@ -2142,6 +2142,39 @@ sub random_fids_on_device { return @some_fids; } +sub BLOB_BIND_TYPE { undef; } + +sub set_checksum { + my ($self, $fidid, $checksumtype, $checksum) = @_; + my $dbh = $self->dbh; + die "Your database does not support REPLACE! Reimplement set_checksum!" unless $self->can_replace; + + eval { + my $sth = $dbh->prepare("REPLACE INTO checksum " . + "(fid, checksumtype, checksum) " . + "VALUES (?, ?, ?)"); + $sth->bind_param(1, $fidid); + $sth->bind_param(2, $checksumtype); + $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); + $sth->execute; + }; + $self->condthrow; +} + +sub get_checksum { + my ($self, $fidid) = @_; + + $self->dbh->selectrow_hashref("SELECT fid, checksumtype, checksum " . + "FROM checksum WHERE fid = ?", + undef, $fidid); +} + +sub delete_checksum { + my ($self, $fidid) = @_; + + $self->dbh->do("DELETE FROM checksum WHERE fid = ?", undef, $fidid); +} + 1; __END__ diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 30c0ef6e..28c729cc 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -110,6 +110,7 @@ sub setup_database { sub filter_create_sql { my ($self, $sql) = @_; $sql =~ s/\bUNSIGNED\b//g; + $sql =~ s/\bVARBINARY\(\d+\)/bytea/g; $sql =~ s/\b(?:TINY|MEDIUM)INT\b/SMALLINT/g; $sql =~ s/\bINT\s+NOT\s+NULL\s+AUTO_INCREMENT\b/SERIAL/g; $sql =~ s/# /-- /g; @@ -830,6 +831,40 @@ sub release_lock { return $rv; } +sub BLOB_BIND_TYPE { { pg_type => PG_BYTEA } } + +sub set_checksum { + my ($self, $fidid, $checksumtype, $checksum) = @_; + my $dbh = $self->dbh; + + $dbh->begin_work; + eval { + my $sth = $dbh->prepare("INSERT INTO checksum " . + "(fid, checksumtype, checksum) ". + "VALUES (?, ?, ?)"); + $sth->bind_param(1, $fidid); + $sth->bind_param(2, $checksumtype); + $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); + $sth->execute; + }; + if ($@ || $dbh->err) { + if ($self->was_duplicate_error) { + eval { + my $sth = $dbh->prepare("UPDATE checksum " . + "SET checksumtype = ?, checksum = ? " . + "WHERE fid = ?"); + $sth->bind_param(1, $checksumtype); + $sth->bind_param(2, $checksum, BLOB_BIND_TYPE); + $sth->bind_param(3, $fidid); + $sth->execute; + }; + $self->condthrow; + } + } + $dbh->commit; + $self->condthrow; +} + 1; __END__ diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 257cd1cc..b9bb4c39 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -1,7 +1,7 @@ package MogileFS::Store::SQLite; use strict; use warnings; -use DBI; +use DBI qw(:sql_types); use DBD::SQLite 1.13; use MogileFS::Util qw(throw); use base 'MogileFS::Store'; @@ -254,6 +254,7 @@ sub note_done_replicating { my ($self, $fidid) = @_; } +sub BLOB_BIND_TYPE { SQL_BLOB } 1; diff --git a/t/store.t b/t/store.t index 593f1ca5..f2ece189 100644 --- a/t/store.t +++ b/t/store.t @@ -11,7 +11,7 @@ use MogileFS::Test; my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 21; + plan tests => 30; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -176,9 +176,20 @@ ok ( "retry_on_deadlock got proper return value and iteration while inducing a deadlock" ); +use Digest::MD5 qw(md5); +$sto->set_checksum(6, 1, md5("FOO")); +my $hash = $sto->get_checksum(6); +ok($hash->{checksum} eq md5("FOO"), "checksum matches expected"); +ok($hash->{fid} == 6, "checksum fid set correctly"); +ok($hash->{checksumtype} == 1, "checksumtype set correctly"); +$sto->set_checksum(6, 2, md5("MOO")); +$hash = $sto->get_checksum(6); +ok($hash->{checksum} eq md5("MOO"), "checksum matches expected"); +ok($hash->{fid} == 6, "checksum fid set correctly"); +ok($hash->{checksumtype} == 2, "checksumtype set correctly"); - - - +ok(1 == $sto->delete_checksum(6), "checksum deleted OK"); +ok(0 == $sto->delete_checksum(6), "checksum delete MISS"); +ok(!defined $sto->get_checksum(6), "undef on missing checksum"); From 853b6c6e719d0b7a78dca0feda94d60d81e710b9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Nov 2011 11:24:15 +0000 Subject: [PATCH 168/405] t/httpfile: more tests for larger (100M) file We need to ensure we're sane when dealing with larger files requiring multiple reads. --- t/40-httpfile.t | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/t/40-httpfile.t b/t/40-httpfile.t index 02157154..895f73c6 100644 --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -13,7 +13,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 13; + plan tests => 16; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -110,3 +110,20 @@ ok($md5_digest eq md5("DATA"), "http only"); $md5_digest = $file->md5(sub {}); ok($md5_digest eq md5("DATA"), "mgmt or http"); ok(length($md5_digest) == 16, "MD5 is 16 bytes (128 bits)"); + +my $size = 100 * 1024 * 1024; +$fh = $mogc->new_file("largefile", "1copy") + or die "Failed to create largefile: " . $mogc->errstr; +$data = "LARGE" x 20; +my $expect = Digest::MD5->new; +foreach my $i (1..(1024 * 1024)) { + $expect->add($data); + print $fh $data or die "failed to write chunk $i for largefile"; +} +close($fh) or die "Failed to close largefile"; +$expect = $expect->digest; +@paths = $mogc->get_paths("largefile"); +$file = MogileFS::HTTPFile->at($paths[0]); +ok($size == $file->size, "big file size match $size"); +ok($file->md5_mgmt(sub {}) eq $expect, "md5_mgmt on big file"); +ok($file->md5_http(sub {}) eq $expect, "md5_http on big file"); From 6bf871429baad03d14ea62a5530b2b54a71ad65b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Nov 2011 11:39:34 +0000 Subject: [PATCH 169/405] increase MD5 buffers to 1 megabyte (from 16K) This matches the buffer size used by replication, and showed a performance increase when timing the 100M large file test in t/40-httpfile.t With the following patch, I was able to note a ~46 -> ~27s time difference with both MD5 methods using this change to increase buffer sizes. --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -125,5 +125,12 @@ $expect = $expect->digest; @paths = $mogc->get_paths("largefile"); $file = MogileFS::HTTPFile->at($paths[0]); ok($size == $file->size, "big file size match $size"); +use Time::HiRes qw/tv_interval gettimeofday/; + +my $t0; +$t0 = [gettimeofday]; ok($file->md5_mgmt(sub {}) eq $expect, "md5_mgmt on big file"); +print "mgmt ", tv_interval($t0), "\n"; +$t0 = [gettimeofday]; ok($file->md5_http(sub {}) eq $expect, "md5_http on big file"); +print "http ", tv_interval($t0), "\n"; --- lib/MogileFS/HTTPFile.pm | 4 ++-- lib/Mogstored/SideChannelClient.pm | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 8435a9cc..8f95721a 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -218,8 +218,8 @@ sub md5_http { my $digest = Digest::MD5->new; my %opts = ( - # default (4K) is tiny, we can try bigger, maybe 1M like replicate - ':read_size_hint' => 0x4000, + # default (4K) is tiny, use 1M like replicate + ':read_size_hint' => 0x100000, ':content_cb' => sub { $digest->add($_[0]); $ping_cb->(); diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 0cc94be9..f30dddb7 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -146,7 +146,7 @@ sub md5_fh { my $bytes = length($data); $offset += $bytes; $md5->add($data); - Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); + Perlbal::AIO::aio_read($fh, $offset, 0x100000, $data, $cb); } elsif ($retval == 0) { # EOF $cb = undef; CORE::close($fh); @@ -160,7 +160,7 @@ sub md5_fh { $self->after_long_request; # should we try to continue? } }; - Perlbal::AIO::aio_read($fh, $offset, 0x4000, $data, $cb); + Perlbal::AIO::aio_read($fh, $offset, 0x100000, $data, $cb); } sub after_long_request { From 175e328f0d259576ca6fe3c07016dc73bea18862 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Nov 2011 00:42:34 +0000 Subject: [PATCH 170/405] side_channel: switch to hexdigest for exchanging md5 checksums Base64 requires further escaping for our tracker protocol which gets ugly and confusing. It's also easier to interact/verify with existing command-line tools using hex. --- lib/MogileFS/HTTPFile.pm | 6 ++---- lib/Mogstored/SideChannelClient.pm | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 8f95721a..56ebeaee 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -4,7 +4,6 @@ use warnings; use Carp qw(croak); use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); use Digest::MD5; -use MIME::Base64; use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); @@ -194,9 +193,8 @@ retry: # FIXME, this could be another error like EMFILE/ENFILE return FILE_MISSING; } - if (length($md5) == 22) { - # Digest::MD5->b64digest on mogstored does not pad - return decode_base64("$md5=="); + if (length($md5) == 32) { + return pack("H*", $md5); } } elsif ($rv =~ /^ERROR /) { return; # old server, fallback to HTTP diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index f30dddb7..6c41c61d 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -150,8 +150,8 @@ sub md5_fh { } elsif ($retval == 0) { # EOF $cb = undef; CORE::close($fh); - my $content_md5 = $md5->b64digest; - $self->write("$uri md5=$content_md5\r\n"); + $md5 = $md5->hexdigest; + $self->write("$uri md5=$md5\r\n"); $self->after_long_request; } else { $cb = undef; From 857e5da05d1a99c2d90fbe99d05893b52ee32ee3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Nov 2011 01:13:16 +0000 Subject: [PATCH 171/405] add MogileFS::Checksum class We need a place to store mappings for various checksum types we'll support. --- lib/MogileFS/Checksum.pm | 45 ++++++++++++++++++++++++++++++++++++++++ t/checksum.t | 25 ++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 lib/MogileFS/Checksum.pm create mode 100644 t/checksum.t diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm new file mode 100644 index 00000000..410fdd01 --- /dev/null +++ b/lib/MogileFS/Checksum.pm @@ -0,0 +1,45 @@ +package MogileFS::Checksum; +use strict; +use warnings; +use overload '""' => \&as_string; + +our %NAME2TYPE = ( + md5 => 1, +); + +our %TYPE2NAME = map { $NAME2TYPE{$_} => $_} keys(%NAME2TYPE); + +sub new { + my ($class, $row) = @_; + my $self = bless { + fidid => $row->{fid}, + checksum => $row->{checksum}, + checksumtype => $row->{checksumtype} + }, $class; + + return $self; +} + +sub checksumname { + my $self = shift; + my $type = $self->{checksumtype}; + my $name = $TYPE2NAME{$type} or die "checksumtype=$type unknown"; + + return $name; +} + +sub hexdigest { + my $self = shift; + + unpack("H*", $self->{checksum}); +} + +sub as_string { + my $self = shift; + my $name = $self->checksumname; + my $hexdigest = $self->hexdigest; + + "Checksum[f=$self->{fidid};$name=$hexdigest]" +} + +1; diff --git a/t/checksum.t b/t/checksum.t new file mode 100644 index 00000000..f5ae65d0 --- /dev/null +++ b/t/checksum.t @@ -0,0 +1,25 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Checksum; +use Digest::MD5 qw(md5 md5_hex); + +my $sto = eval { temp_store(); }; +if ($sto) { + plan tests => 2; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +$sto->set_checksum(6, 1, md5("asdf")); +my $hash = $sto->get_checksum(6); +my $csum = MogileFS::Checksum->new($hash); +is(md5_hex("asdf"), $csum->hexdigest); +is("md5", $csum->checksumname); From bf24853510613e49c28c247915157b0ad99793dd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Nov 2011 02:05:50 +0000 Subject: [PATCH 172/405] store: update class table with checksumtype column This is needed to wire up checksums to classes. --- lib/MogileFS/Store.pm | 1 + lib/MogileFS/Store/SQLite.pm | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index d5638eb0..b0199950 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -533,6 +533,7 @@ sub setup_database { $sto->upgrade_modify_server_settings_value; $sto->upgrade_add_file_to_queue_arg; $sto->upgrade_modify_device_size; + $sto->upgrade_add_class_checksumtype; return 1; } diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index b9bb4c39..3cf6ee5e 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -111,6 +111,7 @@ sub TABLE_class { classid TINYINT UNSIGNED NOT NULL, classname VARCHAR(50), mindevcount TINYINT UNSIGNED NOT NULL, + checksumtype TINYINT UNSIGNED, UNIQUE (dmid,classid), UNIQUE (dmid,classname) )" @@ -242,6 +243,7 @@ sub upgrade_add_device_drain { sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } +sub upgrade_add_class_checksumtype { 1 } # inefficient, but no warning and no locking sub should_begin_replicating_fidid { From c49791201f146b85573b0be3200bdf61add35d50 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Nov 2011 21:25:15 +0000 Subject: [PATCH 173/405] replicate: optional digest support Digest::MD5 and Digest::SHA1 both support the same API for streaming data for the calculation, so we can validate our content as we stream it. --- lib/MogileFS/Worker/Replicate.pm | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index bcac264b..6035ad0b 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -302,6 +302,7 @@ sub replicate { my $errref = delete $opts{'errref'}; my $no_unlock = delete $opts{'no_unlock'}; + my $digest = delete $opts{'digest'}; my $fixed_source = delete $opts{'source_devid'}; my $mask_devids = delete $opts{'mask_devids'} || {}; my $avoid_devids = delete $opts{'avoid_devids'} || {}; @@ -481,6 +482,7 @@ sub replicate { expected_len => undef, # FIXME: get this info to pass along errref => \$copy_err, callback => sub { $worker->still_alive; }, + digest => $digest, ); die "Bogus error code: $copy_err" if !$rv && $copy_err !~ /^(?:src|dest)_error$/; @@ -524,7 +526,7 @@ sub replicate { # copies a file from one Perlbal to another utilizing HTTP sub http_copy { my %opts = @_; - my ($sdevid, $ddevid, $fid, $rfid, $expected_clen, $intercopy_cb, $errref) = + my ($sdevid, $ddevid, $fid, $rfid, $expected_clen, $intercopy_cb, $errref, $digest) = map { delete $opts{$_} } qw(sdevid ddevid fid @@ -532,6 +534,7 @@ sub http_copy { expected_len callback errref + digest ); die if %opts; @@ -642,6 +645,7 @@ sub http_copy { # now we've read in $bytes bytes $remain -= $bytes; $bytes_to_read = $remain if $remain < $bytes_to_read; + $digest->add($data) if $digest; my $data_len = $bytes; my $data_off = 0; From 380f21c7b21f359b44eee07d9eea3bfaba529ca0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Nov 2011 21:41:25 +0000 Subject: [PATCH 174/405] class: wire up checksum support to this Checksum usage will be decided on a per-class basis. --- lib/MogileFS/Class.pm | 3 +++ lib/MogileFS/Store.pm | 22 ++++++++++++++++++---- t/01-domain-class.t | 12 +++++++++--- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Class.pm b/lib/MogileFS/Class.pm index b1861b79..bf9db55b 100644 --- a/lib/MogileFS/Class.pm +++ b/lib/MogileFS/Class.pm @@ -2,6 +2,7 @@ package MogileFS::Class; use strict; use warnings; use MogileFS::Util qw(throw); +use MogileFS::Checksum; =head1 @@ -24,6 +25,8 @@ sub id { $_[0]{classid} } sub name { $_[0]{classname} } sub mindevcount { $_[0]{mindevcount} } sub dmid { $_[0]{dmid} } +sub checksumtype { $_[0]{checksumtype} } +sub checksumname { $MogileFS::Checksum::TYPE2NAME{$_[0]{checksumtype}} } sub repl_policy_string { my $self = shift; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b0199950..75978350 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -946,6 +946,17 @@ sub update_class_replpolicy { return 1; } +# return 1 on success, die otherwise +sub update_class_checksumtype { + my $self = shift; + my %arg = $self->_valid_params([qw(dmid classid checksumtype)], @_); + eval { + $self->dbh->do("UPDATE class SET checksumtype=? WHERE dmid=? AND classid=?", + undef, $arg{checksumtype}, $arg{dmid}, $arg{classid}); + }; + $self->condthrow; +} + sub nfiles_with_dmid_classid_devcount { my ($self, $dmid, $classid, $devcount) = @_; return $self->dbh->selectrow_array('SELECT COUNT(*) FROM file WHERE dmid = ? AND classid = ? AND devcount = ?', @@ -1314,12 +1325,15 @@ sub get_all_classes { my ($self) = @_; my (@ret, $row); - my $repl_col = ""; + my @cols = qw/dmid classid classname mindevcount/; if ($self->cached_schema_version >= 10) { - $repl_col = ", replpolicy"; + push @cols, 'replpolicy'; + if ($self->cached_schema_version >= 15) { + push @cols, 'checksumtype'; + } } - - my $sth = $self->dbh->prepare("SELECT dmid, classid, classname, mindevcount $repl_col FROM class"); + my $cols = join(', ', @cols); + my $sth = $self->dbh->prepare("SELECT $cols FROM class"); $sth->execute; push @ret, $row while $row = $sth->fetchrow_hashref; return @ret; diff --git a/t/01-domain-class.t b/t/01-domain-class.t index 51e5088a..ccf8f516 100644 --- a/t/01-domain-class.t +++ b/t/01-domain-class.t @@ -18,7 +18,7 @@ use Data::Dumper qw/Dumper/; my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 33; + plan tests => 35; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -128,6 +128,10 @@ ok($domfac != $classfac, "factories are not the same singleton"); replpolicy => 'MultipleHosts(6)'), 'can set replpolicy'); ok($sto->update_class_name(dmid => $domid, classid => $clsid2, classname => 'boo'), 'can rename class'); + ok($sto->update_class_checksumtype(dmid => $domid, classid => $clsid2, + checksumtype => 1), 'can set checksum type'); + ok($sto->update_class_checksumtype(dmid => $domid, classid => $clsid2, + checksumtype => undef), 'can unset checksum type'); } { @@ -141,7 +145,8 @@ ok($domfac != $classfac, "factories are not the same singleton"); 'dmid' => '1', 'classid' => '1', 'mindevcount' => '2', - 'classname' => 'bar' + 'classname' => 'bar', + 'checksumtype' => undef, }, 'class bar came back'); # We edited class2 a bunch, make sure that all stuck. is_deeply($classes[1], { @@ -149,6 +154,7 @@ ok($domfac != $classfac, "factories are not the same singleton"); 'dmid' => '1', 'classid' => '2', 'mindevcount' => '3', - 'classname' => 'boo' + 'classname' => 'boo', + 'checksumtype' => undef, }, 'class baz came back as boo'); } From cc3a551b65dfe5003aafa683cb7f7aa4d213b40e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Nov 2011 08:59:29 +0000 Subject: [PATCH 175/405] t/40-httpfile.t: speedup test with working clear_cache This branch is now rebased against my latest clear_cache which allows allows much faster metadata updates for testing. --- t/40-httpfile.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/40-httpfile.t b/t/40-httpfile.t index 895f73c6..481cba64 100644 --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -63,9 +63,9 @@ ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } From 81e791700bcdf94f577a5e1062a509491e137509 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 04:40:38 +0000 Subject: [PATCH 176/405] doc: add checksums.txt for basic design/implementation notes Helps me keep my head straight. --- doc/checksums.txt | 87 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 doc/checksums.txt diff --git a/doc/checksums.txt b/doc/checksums.txt new file mode 100644 index 00000000..e8b8ca85 --- /dev/null +++ b/doc/checksums.txt @@ -0,0 +1,87 @@ +database +-------- + +checksum hashes are represented as binary (BLOB) columns in the +database. They can be up to 64 bytes large for SHA512 or WHIRLPOOL. + +tracker protocol +---------------- + +Hex is used instead of binary over the wire, hex is already immune to +URL encoding in the tracker protocol. + +Hashes are represented with in the $NAME:$HEXDIGEST format: + + md5:68b329da9893e34099c7d8ad5cb9c940 + +verifying checksums +------------------- + +Ideally, mogstored checksum calculation is done by mogstored and only +the checksum (in $NAME=$HEXDIGEST format) is sent over the wire. + +If mogstored is not available, the checksum is calculated on the tracker +by streaming the file with HTTP GET. + +create_close (query worker) +--------------------------- + +New optional parameters: + +- checksumverify=(0|1) default: 0 (false) +- checksum=$NAME:$HEXDIGEST + +If "checksumverify" is "1" and "checksum" is present, "create_close" +will not return until it has verified the checksum. + +If the storage class of a file specifies a valid "checksumtype", +the checksum is saved to the "checksum" table in the database. + +The client is _never_ required to supply a checksum. + +The client may always supply a checksum (including checksumverify) +even if the storage class of the file does not required. + +replication +----------- + +The replication worker can calculate the checksum of the file while it +streams the source file (via GET) to the destination via PUT. + +If the client did not supply a checksum for create_close but the file +storage class requires a checksum, the replication worker will save the +calculated checksum after initial replication. + +If the checksum row for a file already exists before replication, +replication will verify the checksum it got (via GET) against the +checksum row. Replication fails if checksum verification fails +against the database row. + +Replication will also put a "Content-MD5" header in the PUT request if +using MD5 and the checksum is known before replication starts. The +storage server may (but is not required to) verify this and return an +HTTP error response if verification fails. + +If the file storage class requires a checksum: + the replication worker also verifies the checksum + on the destination after replication is complete + (either via mogstored or GET). + +fsck +---- + +If checksum row exists: + verifies all copies match + same behavior as size mismatches in existing code + +If checksum row is missing but class requires checksum: + if all copies of the file have the same checksum: + create the checksum row + if any device containing a copy down: + wait and revisit this FID later + if any of the copies differ: + log everything we do + "trust" checksum of oldest file (via Last-Modified from HEAD) + place "bad" fid in a per-device "lost+found" area + use MOVE if possible, GET+PUT+DELETE fallback + recreate the file with a new fidid using the oldest file From 5713152dc29fed4254e74f3aba67760c1058ca3b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 04:50:45 +0000 Subject: [PATCH 177/405] checksum: add "from_string" and "save" function This can come in handy. --- lib/MogileFS/Checksum.pm | 35 ++++++++++++++++++++++++++++++++--- t/checksum.t | 11 ++++++++++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 410fdd01..410edadf 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -3,11 +3,12 @@ use strict; use warnings; use overload '""' => \&as_string; -our %NAME2TYPE = ( - md5 => 1, +my %TYPE = ( + md5 => { type => 1, bytelen => 128 / 8 }, ); -our %TYPE2NAME = map { $NAME2TYPE{$_} => $_} keys(%NAME2TYPE); +our %NAME2TYPE = map { $_ => $TYPE{$_}->{type} } keys(%TYPE); +our %TYPE2NAME = map { $NAME2TYPE{$_} => $_ } keys(%NAME2TYPE); sub new { my ($class, $row) = @_; @@ -20,6 +21,27 @@ sub new { return $self; } +# $string = "md5:d41d8cd98f00b204e9800998ecf8427e" +sub from_string { + my ($class, $fidid, $string) = @_; + $string =~ /\A(\w+):([a-fA-F0-9]{32,128})\z/ or + die "invalid checksum string"; + my $checksumname = $1; + my $hexdigest = $2; + my $ref = $TYPE{$checksumname} or + die "invalid checksum name ($checksumname) from $string"; + my $checksum = pack("H*", $hexdigest); + my $len = length($checksum); + $len == $ref->{bytelen} or + die "invalid checksum length=$len (expected $ref->{bytelen})"; + + bless { + fidid => $fidid, + checksum => $checksum, + checksumtype => $NAME2TYPE{$checksumname}, + }, $class; +} + sub checksumname { my $self = shift; my $type = $self->{checksumtype}; @@ -28,6 +50,13 @@ sub checksumname { return $name; } +sub save { + my $self = shift; + my $sto = Mgd::get_store(); + + $sto->set_checksum($self->{fidid}, $self->{checksumtype}, $self->{checksum}); +} + sub hexdigest { my $self = shift; diff --git a/t/checksum.t b/t/checksum.t index f5ae65d0..20076681 100644 --- a/t/checksum.t +++ b/t/checksum.t @@ -12,7 +12,7 @@ use Digest::MD5 qw(md5 md5_hex); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 2; + plan tests => 6; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -23,3 +23,12 @@ my $hash = $sto->get_checksum(6); my $csum = MogileFS::Checksum->new($hash); is(md5_hex("asdf"), $csum->hexdigest); is("md5", $csum->checksumname); + +my $zero = "md5:d41d8cd98f00b204e9800998ecf8427e"; +$csum = MogileFS::Checksum->from_string(6, $zero); +is("MogileFS::Checksum", ref($csum), "is a ref"); +is("d41d8cd98f00b204e9800998ecf8427e", $csum->hexdigest, "hex matches"); +is(1, $csum->save, "save successfully"); +$hash = $sto->get_checksum(6); +my $reloaded = MogileFS::Checksum->new($hash); +is("d41d8cd98f00b204e9800998ecf8427e", $reloaded->hexdigest, "hex matches"); From 893ba78034cf415fc520d6a5debe820e2b4bc46b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 05:25:48 +0000 Subject: [PATCH 178/405] checksums: genericize to be algorithm-independent, add SHA* We'll use the "Digest" class in Perl as a guide for this. Only MD5 is officially supported. However, this *should* support SHA-(1|256|384|512) and it's easy to add more algorithms. --- doc/checksums.txt | 2 +- lib/MogileFS/Checksum.pm | 11 +++++-- lib/MogileFS/HTTPFile.pm | 49 ++++++++++++++++-------------- lib/Mogstored/SideChannelClient.pm | 29 +++++++++--------- t/40-httpfile.t | 10 +++--- t/checksum.t | 4 +-- 6 files changed, 57 insertions(+), 48 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index e8b8ca85..41580546 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -12,7 +12,7 @@ URL encoding in the tracker protocol. Hashes are represented with in the $NAME:$HEXDIGEST format: - md5:68b329da9893e34099c7d8ad5cb9c940 + MD5:68b329da9893e34099c7d8ad5cb9c940 verifying checksums ------------------- diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 410edadf..3c39baa6 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -4,7 +4,12 @@ use warnings; use overload '""' => \&as_string; my %TYPE = ( - md5 => { type => 1, bytelen => 128 / 8 }, + "MD5" => { type => 1, bytelen => 128 / 8 }, + "SHA-1" => { type => 2, bytelen => 160 / 8 }, + "SHA-224" => { type => 3, bytelen => 224 / 8 }, + "SHA-256" => { type => 4, bytelen => 256 / 8 }, + "SHA-384" => { type => 5, bytelen => 384 / 8 }, + "SHA-512" => { type => 6, bytelen => 512 / 8 }, ); our %NAME2TYPE = map { $_ => $TYPE{$_}->{type} } keys(%TYPE); @@ -21,10 +26,10 @@ sub new { return $self; } -# $string = "md5:d41d8cd98f00b204e9800998ecf8427e" +# $string = "MD5:d41d8cd98f00b204e9800998ecf8427e" sub from_string { my ($class, $fidid, $string) = @_; - $string =~ /\A(\w+):([a-fA-F0-9]{32,128})\z/ or + $string =~ /\A([\w-]+):([a-fA-F0-9]{32,128})\z/ or die "invalid checksum string"; my $checksumname = $1; my $hexdigest = $2; diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 56ebeaee..6da498af 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -3,7 +3,7 @@ use strict; use warnings; use Carp qw(croak); use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); -use Digest::MD5; +use Digest; use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); @@ -138,8 +138,8 @@ sub size { } } -sub md5_mgmt { - my ($self, $ping_cb) = @_; +sub digest_mgmt { + my ($self, $alg, $ping_cb) = @_; my $mogconn = $self->host->mogstored_conn; my $node_timeout = MogileFS->config("node_timeout"); my $sock; @@ -147,14 +147,14 @@ sub md5_mgmt { my $expiry; my $uri = $self->{uri}; - my $req = "md5 $uri\r\n"; + my $req = "$alg $uri\r\n"; my $reqlen = length $req; # a dead/stale socket may not be detected until we try to recv on it # after sending a request my $retries = 2; - # assuming the storage node can MD5 at >=2MB/s, low expectations here + # assuming the storage node can checksum at >=2MB/s, low expectations here my $response_timeout = $self->size / (2 * 1024 * 1024); my $flag_nosignal = MogileFS::Sys->flag_nosignal; @@ -167,8 +167,9 @@ retry: my $err = $!; $mogconn->mark_dead; if ($retries-- <= 0) { - $err = $err ? "send() error (md5 $uri): $err" : - "short send() (md5 $uri): $rv != $reqlen"; + $req =~ tr/\r\n//d; + $err = $err ? "send() error ($req): $err" : + "short send() ($req): $rv != $reqlen"; $err = $mogconn->{ip} . ":" . $mogconn->{port} . " $err"; return undeferr($err); } @@ -186,24 +187,26 @@ retry: $mogconn->mark_dead; return undeferr("EOF from mogstored") if ($retries-- <= 0); goto retry; - } elsif ($rv =~ /^\Q$uri\E md5=(\S+)\r\n/) { - my $md5 = $1; + } elsif ($rv =~ /^\Q$uri\E \Q$alg\E=([a-f0-9]{32,128})\r\n/) { + my $hexdigest = $1; - if ($md5 eq FILE_MISSING) { + if ($hexdigest eq FILE_MISSING) { # FIXME, this could be another error like EMFILE/ENFILE return FILE_MISSING; } - if (length($md5) == 32) { - return pack("H*", $md5); - } + my $checksum = eval { + MogileFS::Checksum->from_string(0, "$alg:$hexdigest") + }; + return undeferr("$alg failed for $uri: $@") if $@; + return $checksum->{checksum}; } elsif ($rv =~ /^ERROR /) { return; # old server, fallback to HTTP } - return undeferr("mogstored failed to handle (md5 $uri)"); + return undeferr("mogstored failed to handle ($alg $uri)"); } -sub md5_http { - my ($self, $ping_cb) = @_; +sub digest_http { + my ($self, $alg, $ping_cb) = @_; # don't SIGPIPE us (why don't we just globally ignore SIGPIPE?) my $flag_nosignal = MogileFS::Sys->flag_nosignal; @@ -213,7 +216,7 @@ sub md5_http { my $node_timeout = MogileFS->config("node_timeout"); # Hardcoded connection cache size of 20 :( $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); - my $digest = Digest::MD5->new; + my $digest = Digest->new($alg); my %opts = ( # default (4K) is tiny, use 1M like replicate @@ -229,17 +232,17 @@ sub md5_http { return $digest->digest if $res->is_success; return FILE_MISSING if $res->code == 404; - return undeferr("Failed MD5 (GET) check for $path (" . $res->code . "): " + return undeferr("Failed $alg (GET) check for $path (" . $res->code . "): " . $res->message); } -sub md5 { - my ($self, $ping_cb) = @_; - my $md5 = $self->md5_mgmt($ping_cb); +sub digest { + my ($self, $alg, $ping_cb) = @_; + my $digest = $self->digest_mgmt($alg, $ping_cb); - return $md5 if ($md5 && $md5 ne FILE_MISSING); + return $digest if ($digest && $digest ne FILE_MISSING); - $self->md5_http($ping_cb); + $self->digest_http($alg, $ping_cb); } 1; diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 6c41c61d..bf235cba 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -8,7 +8,7 @@ use fields ( 'read_buf', # unprocessed read buffer 'mogsvc', # the mogstored Perlbal::Service object ); -use Digest::MD5; +use Digest; use POSIX qw(O_RDONLY); # needed since we're pretending to be a Perlbal::Socket... never idle out @@ -68,11 +68,12 @@ sub read_buf_consume { } $self->watch_read(0); Mogstored->iostat_subscribe($self); - } elsif ($cmd =~ /^md5 (\S+)$/) { - my $uri = $self->validate_uri($1); + } elsif ($cmd =~ /^(MD5|SHA-(?:1|224|256|384|512)) (\S+)$/) { + my $alg = $1; + my $uri = $self->validate_uri($2); return unless defined($uri); - return $self->md5($path, $uri); + return $self->digest($alg, $path, $uri); } else { # we don't understand this so pass it on to manage command interface my @out; @@ -113,8 +114,8 @@ sub die_gracefully { Mogstored->on_sidechannel_die_gracefully; } -sub md5 { - my ($self, $path, $uri) = @_; +sub digest { + my ($self, $alg, $path, $uri) = @_; $self->watch_read(0); Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { @@ -125,19 +126,19 @@ sub md5 { return; } if ($fh) { - $self->md5_fh($fh, $uri); + $self->digest_fh($alg, $fh, $uri); } else { - $self->write("$uri md5=-1\r\n"); + $self->write("$uri $alg=-1\r\n"); $self->after_long_request; } }); } -sub md5_fh { - my ($self, $fh, $uri) = @_; +sub digest_fh { + my ($self, $alg, $fh, $uri) = @_; my $offset = 0; my $data = ''; - my $md5 = Digest::MD5->new; + my $digest = Digest->new($alg); my $cb; $cb = sub { @@ -145,13 +146,13 @@ sub md5_fh { if ($retval > 0) { my $bytes = length($data); $offset += $bytes; - $md5->add($data); + $digest->add($data); Perlbal::AIO::aio_read($fh, $offset, 0x100000, $data, $cb); } elsif ($retval == 0) { # EOF $cb = undef; CORE::close($fh); - $md5 = $md5->hexdigest; - $self->write("$uri md5=$md5\r\n"); + $digest = $digest->hexdigest; + $self->write("$uri $alg=$digest\r\n"); $self->after_long_request; } else { $cb = undef; diff --git a/t/40-httpfile.t b/t/40-httpfile.t index 481cba64..1fe68f25 100644 --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -100,14 +100,14 @@ MogileFS::Config->load_config; my $file = MogileFS::HTTPFile->at($paths[0]); my $md5_digest; -$md5_digest = $file->md5_mgmt(sub {}); +$md5_digest = $file->digest_mgmt("MD5", sub {}); ok($md5_digest eq md5("DATA"), "mgmt only"); my $cb_called = 0; -$md5_digest = $file->md5_http(sub { $cb_called++ }); +$md5_digest = $file->digest_http("MD5", sub { $cb_called++ }); ok(1 == $cb_called, "ping callback called"); ok($md5_digest eq md5("DATA"), "http only"); -$md5_digest = $file->md5(sub {}); +$md5_digest = $file->digest("MD5", sub {}); ok($md5_digest eq md5("DATA"), "mgmt or http"); ok(length($md5_digest) == 16, "MD5 is 16 bytes (128 bits)"); @@ -125,5 +125,5 @@ $expect = $expect->digest; @paths = $mogc->get_paths("largefile"); $file = MogileFS::HTTPFile->at($paths[0]); ok($size == $file->size, "big file size match $size"); -ok($file->md5_mgmt(sub {}) eq $expect, "md5_mgmt on big file"); -ok($file->md5_http(sub {}) eq $expect, "md5_http on big file"); +ok($file->digest_mgmt('MD5', sub {}) eq $expect, "digest_mgmt('MD5') on big file"); +ok($file->digest_http('MD5', sub {}) eq $expect, "digest_http('MD5') on big file"); diff --git a/t/checksum.t b/t/checksum.t index 20076681..fecc88a3 100644 --- a/t/checksum.t +++ b/t/checksum.t @@ -22,9 +22,9 @@ $sto->set_checksum(6, 1, md5("asdf")); my $hash = $sto->get_checksum(6); my $csum = MogileFS::Checksum->new($hash); is(md5_hex("asdf"), $csum->hexdigest); -is("md5", $csum->checksumname); +is("MD5", $csum->checksumname); -my $zero = "md5:d41d8cd98f00b204e9800998ecf8427e"; +my $zero = "MD5:d41d8cd98f00b204e9800998ecf8427e"; $csum = MogileFS::Checksum->from_string(6, $zero); is("MogileFS::Checksum", ref($csum), "is a ref"); is("d41d8cd98f00b204e9800998ecf8427e", $csum->hexdigest, "hex matches"); From 85e9beafb85450a5db1da5bbc66bd3336f4810d0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 10:02:12 +0000 Subject: [PATCH 179/405] wire up checksum to create_close/file_info/create_class commands We can now: * enable checksums for classes * save client-provided checksums to the database * verify them on create_close * read them in file_info --- lib/MogileFS/Checksum.pm | 17 ++++ lib/MogileFS/Worker/Query.pm | 49 ++++++++++- t/50-checksum.t | 159 +++++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 t/50-checksum.t diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 3c39baa6..da016f28 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -62,6 +62,17 @@ sub save { $sto->set_checksum($self->{fidid}, $self->{checksumtype}, $self->{checksum}); } +sub maybe_save { + my ($self, $dmid, $classid) = @_; + my $class = eval { Mgd::class_factory()->get_by_id($dmid, $classid) }; + + # $class may be undef as it could've been deleted between + # create_open and create_close, we've never verified this before... + if ($class && $self->{checksumtype} eq $class->{checksumtype}) { + $self->save; + } +} + sub hexdigest { my $self = shift; @@ -76,4 +87,10 @@ sub as_string { "Checksum[f=$self->{fidid};$name=$hexdigest]" } +sub info { + my $self = shift; + + $self->checksumname . ':' . $self->hexdigest; +} + 1; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 2f609ae9..40cb4e51 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -355,6 +355,12 @@ sub cmd_create_close { my $fidid = $args->{fid} or return $self->err_line("no_fid"); my $devid = $args->{devid} or return $self->err_line("no_devid"); my $path = $args->{path} or return $self->err_line("no_path"); + my $checksum = $args->{checksum}; + + if ($checksum) { + $checksum = eval { MogileFS::Checksum->from_string($fidid, $checksum) }; + return $self->err_line("invalid_checksum_format") if $@; + } my $fid = MogileFS::FID->new($fidid); my $dfid = MogileFS::DevFID->new($devid, $fid); @@ -391,7 +397,8 @@ sub cmd_create_close { } # get size of file and verify that it matches what we were given, if anything - my $size = MogileFS::HTTPFile->at($path)->size; + my $httpfile = MogileFS::HTTPFile->at($path); + my $size = $httpfile->size; # size check is optional? Needs to support zero byte files. $args->{size} = -1 unless $args->{size}; @@ -408,6 +415,20 @@ sub cmd_create_close { return $self->err_line("size_mismatch", "Expected: $args->{size}; actual: $size; path: $path") } + # checksum validation is optional as it can be very expensive + # However, we /always/ verify it if the client wants us to, even + # if the class does not enforce or store it. + if ($checksum && $args->{checksumverify}) { + my $alg = $checksum->checksumname; + my $actual = $httpfile->digest($alg); # expensive! + if ($actual ne $checksum->{checksum}) { + $failed->(); + $actual = "$alg:" . unpack("H*", $actual); + return $self->err_line("checksum_mismatch", + "Expected: $checksum; actual: $actual; path: $path"); + } + } + # see if we have a fid for this key already my $old_fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); if ($old_fid) { @@ -424,6 +445,8 @@ sub cmd_create_close { # insert file_on row $dfid->add_to_db; + $checksum->maybe_save($dmid, $trow->{classid}) if $checksum; + $sto->replace_into_file( fidid => $fidid, dmid => $dmid, @@ -600,8 +623,17 @@ sub cmd_file_info { my $ret = {}; $ret->{fid} = $fid->id; $ret->{domain} = Mgd::domain_factory()->get_by_id($fid->dmid)->name; - $ret->{class} = Mgd::class_factory()->get_by_id($fid->dmid, - $fid->classid)->name; + my $class = Mgd::class_factory()->get_by_id($fid->dmid, $fid->classid); + $ret->{class} = $class->name; + if ($class->{checksumtype}) { + my $checksum = Mgd::get_store()->get_checksum($fid->id); + if ($checksum) { + $checksum = MogileFS::Checksum->new($checksum); + $ret->{checksum} = $checksum->info; + } else { + $ret->{checksum} = "MISSING"; + } + } $ret->{key} = $key; $ret->{'length'} = $fid->length; $ret->{devcount} = $fid->devcount; @@ -840,6 +872,13 @@ sub cmd_create_class { return $self->err_line('invalid_replpolicy', $@) if $@; } + my $checksumtype = $args->{checksumtype}; + if ($checksumtype && $checksumtype ne 'NONE') { + my $tmp = $MogileFS::Checksum::NAME2TYPE{$checksumtype}; + return $self->err_line('invalid_checksumtype') unless $tmp; + $checksumtype = $tmp; + } + my $sto = Mgd::get_store(); my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); @@ -866,6 +905,10 @@ sub cmd_create_class { # don't erase an existing replpolicy if we're not setting a new one. $sto->update_class_replpolicy(dmid => $dmid, classid => $clsid, replpolicy => $replpolicy) if $replpolicy; + if ($checksumtype) { + $sto->update_class_checksumtype(dmid => $dmid, classid => $clsid, + checksumtype => $checksumtype eq 'NONE' ? undef : $checksumtype); + } # return success return $self->cmd_clear_cache({ class => $class, mindevcount => $mindevcount, domain => $domain }); diff --git a/t/50-checksum.t b/t/50-checksum.t new file mode 100644 index 00000000..1946b633 --- /dev/null +++ b/t/50-checksum.t @@ -0,0 +1,159 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); + +my $sto = eval { temp_store(); }; +if ($sto) { + plan tests => 34; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms1, "got mogstored2"); + +while (! -e "$mogroot{1}/dev1/usage" && + ! -e "$mogroot{2}/dev2/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +wait_for_monitor($be); + +my ($req, $rv, %opts); +my $ua = LWP::UserAgent->new; + +use Data::Dumper; +use Digest::MD5 qw/md5_hex/; + +# verify upload checksum +{ + my $key = "ok"; + %opts = ( domain => "testdom", class => "1copy", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + is($sto->get_checksum($opts{fid}), undef, "checksum not saved"); + ok($mogc->file_info($key), "file_info($key) is sane"); +} + +# corrupted upload checksum fails +{ + my $key = 'corrupt'; + %opts = ( domain => "testdom", class => "1copy", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + + $opts{checksumverify} = 1; + $opts{checksum} = "MD5:".md5_hex('fail'); + $rv = $be->do_request("create_close", \%opts); + ok(!defined($rv), "checksum verify noticed mismatch"); + my $hex = md5_hex('blah'); + is('checksum_mismatch', $be->{lasterr}, "error code is correct"); + ok($be->{lasterrstr} =~ /actual: MD5:$hex;/, "error message shows actual:"); + is($sto->get_checksum($opts{fid}), undef, "checksum not saved"); + is($mogc->file_info($key), undef, "$key not uploaded"); +} + +# enable saving MD5 checksums in "2copies" class +{ + %opts = ( domain => "testdom", class => "2copies", + checksumtype => "MD5", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# save new row to checksum table +{ + my $key = 'savecksum'; + %opts = ( domain => "testdom", class => "2copies", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + my $row = $sto->get_checksum($opts{fid}); + ok($row, "checksum saved"); + my $info = $mogc->file_info($key); + ok($info, "file_info($key) is sane"); + is($info->{checksum}, "MD5:".md5_hex('blah'), 'checksum shows up'); + $sto->delete_checksum($info->{fid}); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); +} From 7ecf77da522dbdf7bcd8304e9a6a4728824e87e4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 20:47:13 +0000 Subject: [PATCH 180/405] test for update_class with checksumtype=NONE we need to be able to both enable and disable checksuming for a class --- t/50-checksum.t | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/t/50-checksum.t b/t/50-checksum.t index 1946b633..a628bff9 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -11,7 +11,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 34; + plan tests => 40; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -157,3 +157,23 @@ use Digest::MD5 qw/md5_hex/; $info = $mogc->file_info($key); is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); } + +{ + my @classes; + %opts = ( domain => "testdom", class => "1copy", mindevcount => 1 ); + + $opts{checksumtype} = "NONE"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{checksumtype}, undef, "checksumtype unset"); + + $opts{checksumtype} = "MD5"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{checksumtype}, 1, "checksumtype is 1 (MD5)"); + + $opts{checksumtype} = "NONE"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{checksumtype}, undef, "checksumtype unset"); +} From 0323554f55211d5d37857176ec9729b8f3d95ab4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Nov 2011 21:10:58 +0000 Subject: [PATCH 181/405] add MogileFS::FID->checksum function This returns undef if a checksum is missing for a class, and a MogileFS::Checksum object if it exists. --- lib/MogileFS/FID.pm | 8 ++++++++ t/checksum.t | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 5ad6a414..5e32d32e 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -277,6 +277,14 @@ sub forget_about_device { return 1; } +# return an FID's checksum object, undef if it's missing +sub checksum { + my $self = shift; + my $row = Mgd::get_store()->get_checksum($self->{fidid}) or return undef; + + MogileFS::Checksum->new($row); +} + 1; __END__ diff --git a/t/checksum.t b/t/checksum.t index fecc88a3..634b4985 100644 --- a/t/checksum.t +++ b/t/checksum.t @@ -12,7 +12,7 @@ use Digest::MD5 qw(md5 md5_hex); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 6; + plan tests => 7; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -32,3 +32,5 @@ is(1, $csum->save, "save successfully"); $hash = $sto->get_checksum(6); my $reloaded = MogileFS::Checksum->new($hash); is("d41d8cd98f00b204e9800998ecf8427e", $reloaded->hexdigest, "hex matches"); +my $fid_checksum = MogileFS::FID->new(6)->checksum; +is_deeply($fid_checksum, $csum, "MogileFS::FID->checksum works"); From fdab72f63ddfac7c16b30fb46cbd642da16038bc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Nov 2011 09:38:06 +0000 Subject: [PATCH 182/405] add checksum generation/verifiation to replication worker replication now lazily generates checksums if they're not provided by the client (but required by the storage class). replication may also verify checksums if they're available in the database. replication now sets the Content-MD5 header on PUT requests, in case the remote server is capable of rejecting corrupt transfers based on it replication attempts to verify the checksum of the freshly PUT-ed file. TODO: monitor will attempt "test-write" with mangled Content-MD5 to determine if storage backends are Content-MD5-capable so replication can avoid reading checksum on destination --- lib/MogileFS/Worker/Replicate.pm | 43 +++++++++++++++++++++++++++++--- t/50-checksum.t | 41 ++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 6035ad0b..7dbd712f 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -12,6 +12,8 @@ use MogileFS::Server; use MogileFS::Util qw(error every debug); use MogileFS::Config; use MogileFS::ReplicationRequest qw(rr_upgrade); +use Digest; +use MIME::Base64 qw(encode_base64); # setup the value used in a 'nexttry' field to indicate that this item will never # actually be tried again and require some sort of manual intervention. @@ -302,7 +304,6 @@ sub replicate { my $errref = delete $opts{'errref'}; my $no_unlock = delete $opts{'no_unlock'}; - my $digest = delete $opts{'digest'}; my $fixed_source = delete $opts{'source_devid'}; my $mask_devids = delete $opts{'mask_devids'} || {}; my $avoid_devids = delete $opts{'avoid_devids'} || {}; @@ -474,6 +475,7 @@ sub replicate { } my $worker = MogileFS::ProcManager->is_child or die; + my $digest = Digest->new($cls->checksumname) if $cls->checksumtype; my $rv = http_copy( sdevid => $sdevid, ddevid => $ddevid, @@ -503,6 +505,9 @@ sub replicate { my $dfid = MogileFS::DevFID->new($ddevid, $fid); $dfid->add_to_db; + if ($digest && !$fid->checksum) { + $sto->set_checksum($fidid, $cls->checksumtype, $digest->digest); + } push @on_devs, $devs->{$ddevid}; push @on_devs_tellpol, $devs->{$ddevid}; @@ -538,6 +543,15 @@ sub http_copy { ); die if %opts; + my $content_md5 = ''; + my $fid_checksum = $rfid->checksum; + if ($fid_checksum && $fid_checksum->checksumname eq "MD5") { + # some HTTP servers may be able to verify Content-MD5 on PUT + # and reject corrupted requests. no HTTP server should reject + # a request for an unrecognized header + my $b64digest = encode_base64($fid_checksum->{checksum}, ""); + $content_md5 = "Content-MD5: $b64digest\r\n"; + } $intercopy_cb ||= sub {}; @@ -629,7 +643,7 @@ sub http_copy { # open target for put my $dsock = IO::Socket::INET->new(PeerAddr => $dhostip, PeerPort => $dport, Timeout => 2) or return $dest_error->("Unable to create dest socket to $dhostip:$dport for $dpath"); - $dsock->write("PUT $dpath HTTP/1.0\r\nContent-length: $clen\r\n\r\n") + $dsock->write("PUT $dpath HTTP/1.0\r\nContent-length: $clen$content_md5\r\n\r\n") or return $dest_error->("Unable to write data to $dpath on $dhostip:$dport"); return $dest_error->("Pipe closed during write to $dpath on $dhostip:$dport") if $pipe_closed; @@ -674,10 +688,33 @@ sub http_copy { return $dest_error->("closed pipe writing to destination") if $pipe_closed; return $src_error->("error reading midway through source: $!") unless $finished_read; + # callee will want this digest, too, so clone as "digest" is destructive + $digest = $digest->clone->digest if $digest; + + if ($fid_checksum) { + if ($digest ne $fid_checksum->{checksum}) { + my $expect = $fid_checksum->hexdigest; + $digest = unpack("H*", $digest); + return $src_error->("checksum mismatch on GET: expected: $expect actual: $digest"); + } + } + # now read in the response line (should be first line) my $line = <$dsock>; if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - return 1 if $1 >= 200 && $1 <= 299; + if ($1 >= 200 && $1 <= 299) { + if ($digest) { + my $alg = $rfid->class->checksumname; + my $durl = "http://$dhostip:$dport$dpath"; + my $actual = MogileFS::HTTPFile->at($durl)->digest($alg); + if ($actual ne $digest) { + my $expect = unpack("H*", $digest); + $actual = unpack("H*", $actual); + return $dest_error->("checksum mismatch on PUT, expected: $expect actual: $digest"); + } + } + return 1; + } return $dest_error->("Got HTTP status code $1 PUTing to http://$dhostip:$dport$dpath"); } else { return $dest_error->("Error: HTTP response line not recognized writing to http://$dhostip:$dport$dpath: $line"); diff --git a/t/50-checksum.t b/t/50-checksum.t index a628bff9..4b1fc678 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -11,7 +11,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 40; + plan tests => 47; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -41,6 +41,8 @@ while (! -e "$mogroot{1}/dev1/usage" && my $tmptrack = create_temp_tracker($sto); ok($tmptrack); +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; my $mogc = MogileFS::Client->new( domain => "testdom", hosts => [ "127.0.0.1:7001" ], @@ -71,7 +73,7 @@ sub wait_for_monitor { wait_for_monitor($be); -my ($req, $rv, %opts); +my ($req, $rv, %opts, @paths); my $ua = LWP::UserAgent->new; use Data::Dumper; @@ -177,3 +179,38 @@ use Digest::MD5 qw/md5_hex/; @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; is($classes[0]->{checksumtype}, undef, "checksumtype unset"); } + +# wait for replicate to verify existing (valid) checksum +{ + my $key = 'savecksum'; + + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) == 1 and sleep(0.1)); + is(scalar(@paths), 2, "replicate successfully with good checksum"); +} + +# save checksum on replicate, client didn't care to provide one +{ + my $key = 'lazycksum'; + + syswrite($admin, "!want 0 replicate\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabled replicate"); + + my $fh = $mogc->new_file($key, "2copies"); + print $fh "lazy"; + ok(close($fh), "closed file"); + my $info = $mogc->file_info($key); + is($info->{checksum}, 'MISSING', 'checksum is MISSING'); + + syswrite($admin, "!want 1 replicate\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) == 1 && sleep(0.1)); + is(scalar(@paths), 2, "replicate successfully with good checksum"); + + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after repl'); +} From bd624ed8d13fecf0dc2381d73118ba9a27a6774f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Nov 2011 09:03:27 +0000 Subject: [PATCH 183/405] monitor observes Content-MD5-rejectability This functionality (and a server capable of rejecting bad MD5s) will allow us to skip an expensive MogileFS::HTTPFile->digest request at replication time. Also testing with the following patch to Perlbal: --- a/lib/mogdeps/Perlbal/ClientHTTP.pm +++ b/lib/mogdeps/Perlbal/ClientHTTP.pm @@ -22,6 +22,7 @@ use fields ('put_in_progress', # 1 when we're currently waiting for an async job 'content_length', # length of document being transferred 'content_length_remain', # bytes remaining to be read 'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef + 'md5_ctx', ); use HTTP::Date (); @@ -29,6 +30,7 @@ use File::Path; use Errno qw( EPIPE ); use POSIX qw( O_CREAT O_TRUNC O_WRONLY O_RDONLY ENOENT ); +use Digest::MD5; # class list of directories we know exist our (%VerifiedDirs); @@ -61,6 +63,7 @@ sub init { $self->{put_fh} = undef; $self->{put_pos} = 0; $self->{chunked_upload_state} = undef; + $self->{md5_ctx} = undef; } sub close { @@ -134,6 +137,8 @@ sub handle_put { return $self->send_response(403) unless $self->{service}->{enable_put}; + $self->{md5_ctx} = $hd->header('Content-MD5') ? Digest::MD5->new : undef; + return if $self->handle_put_chunked; # they want to put something, so let's setup and wait for more reads @@ -421,6 +426,8 @@ sub put_writeout { my $data = join("", map { $$_ } @{$self->{read_buf}}); my $count = length $data; + my $md5_ctx = $self->{md5_ctx}; + $md5_ctx->add($data) if $md5_ctx; # reset our input buffer $self->{read_buf} = []; @@ -460,6 +467,17 @@ sub put_close { if (CORE::close($self->{put_fh})) { $self->{put_fh} = undef; + + my $md5_ctx = $self->{md5_ctx}; + if ($md5_ctx) { + my $actual = $md5_ctx->b64digest; + my $expect = $self->{req_headers}->header("Content-MD5"); + $expect =~ s/=+\s*\z//; + if ($actual ne $expect) { + return $self->send_response(400, + "Content-MD5 mismatch, expected: $expect actual: $actual"); + } + } return $self->send_response(200); } else { return $self->system_error("Error saving file", "error in close: $!"); --- lib/MogileFS/Device.pm | 2 +- lib/MogileFS/Worker/Monitor.pm | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 2740d68b..1d96beef 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -17,7 +17,7 @@ BEGIN { eval "sub TESTING () { $testing }"; } -my @observed_fields = qw/observed_state utilization/; +my @observed_fields = qw/observed_state utilization reject_bad_md5/; my @fields = (qw/hostid status weight mb_total mb_used mb_asof devid/, @observed_fields); diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index a5447183..e36e7f96 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -20,6 +20,7 @@ use MogileFS::Config; use MogileFS::Util qw(error debug encode_url_args); use MogileFS::IOStatWatcher; use MogileFS::Server; +use Digest::MD5 qw(md5_base64); use constant UPDATE_DB_EVERY => 15; @@ -425,6 +426,7 @@ sub check_device { # if success and the content matches, mark it writeable if ($testwrite->is_success && $testwrite->content eq $content) { + $self->check_bogus_md5($dev); $self->state_event('device', $devid, {observed_state => 'writeable'}) if (!$dev->observed_writeable); debug("dev$devid: used = $used, total = $total, writeable = 1"); @@ -439,6 +441,26 @@ sub check_device { debug("dev$devid: used = $used, total = $total, writeable = 0"); } +sub check_bogus_md5 { + my ($self, $dev) = @_; + my $host = $dev->host; + my $hostip = $host->ip; + my $port = $host->http_port; + my $devid = $dev->id; + my $puturl = "http://$hostip:$port/dev$devid/test-write/test-md5"; + my $req = HTTP::Request->new(PUT => $puturl); + $req->header("Content-MD5", md5_base64("!") . "=="); + $req->content("."); + + # success is bad here, it means the server doesn't understand how to + # verify and reject corrupt bodies from Content-MD5 headers. + # most servers /will/ succeed here :< + my $resp = $self->ua->request($req); + my $rej = $resp->is_success ? 0 : 1; + debug("dev$devid: reject_bad_md5 = $rej"); + $self->state_event('device', $devid, { reject_bad_md5 => $rej }); +} + 1; # Local Variables: From 55b29e540572ed38ade913d3710119e30e6a3d75 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Nov 2011 09:17:26 +0000 Subject: [PATCH 184/405] replication skips HTTPFile->digest if device can reject bad MD5s Rereading a large file is expensive. If we can monitor and observe our storage nodes for MD5 rejectionability, we can rely on that instead of having to have anybody reread the entire file to calculate its MD5. --- lib/MogileFS/Worker/Replicate.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 7dbd712f..46c4a371 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -705,6 +705,12 @@ sub http_copy { if ($1 >= 200 && $1 <= 299) { if ($digest) { my $alg = $rfid->class->checksumname; + + if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { + # dest device would've rejected us with a error, + # no need to reread the file + return 1; + } my $durl = "http://$dhostip:$dport$dpath"; my $actual = MogileFS::HTTPFile->at($durl)->digest($alg); if ($actual ne $digest) { From 7b417cdeb39e013ce0c9e63bae0c324240cfb9a3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Nov 2011 09:56:15 +0000 Subject: [PATCH 185/405] doc: update checksums document Only the fsck part remains to be implemented... And I've never studied/used fsck much :x --- doc/checksums.txt | 25 +++++++++++++++++++------ lib/MogileFS/Worker/Fsck.pm | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index 41580546..6e09f6ff 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -14,8 +14,8 @@ Hashes are represented with in the $NAME:$HEXDIGEST format: MD5:68b329da9893e34099c7d8ad5cb9c940 -verifying checksums -------------------- +verifying checksums (on disk) +----------------------------- Ideally, mogstored checksum calculation is done by mogstored and only the checksum (in $NAME=$HEXDIGEST format) is sent over the wire. @@ -42,6 +42,14 @@ The client is _never_ required to supply a checksum. The client may always supply a checksum (including checksumverify) even if the storage class of the file does not required. +monitor +------- + +Will also attempt to check if the storage server can reject PUTs +with mismatched Content-MD5 headers. Replication will use this +info to avoid calling HTTPFile->digest post-replication to verify +the upload completed successfully. + replication ----------- @@ -60,12 +68,17 @@ against the database row. Replication will also put a "Content-MD5" header in the PUT request if using MD5 and the checksum is known before replication starts. The storage server may (but is not required to) verify this and return an -HTTP error response if verification fails. +HTTP error response if verification fails. The monitor change will +allow us to know if a destination device is capable of rejecting +a request based on a Content-MD5 mismatch. If the file storage class requires a checksum: - the replication worker also verifies the checksum - on the destination after replication is complete - (either via mogstored or GET). + + If the destination device can NOT reject based on (bad) MD5: + + Replication worker also verifies the checksum + on the destination after replication is complete + (either via mogstored or GET). fsck ---- diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index f070a363..0b68125c 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -25,6 +25,7 @@ use constant EV_START_SEARCH => "SRCH"; use constant EV_FOUND_FID => "FOND"; use constant EV_RE_REPLICATE => "REPL"; use constant EV_BAD_COUNT => "BCNT"; +use constant EV_BAD_CHECKSUM => "BSUM"; use POSIX (); From 3bc57a87e171ce799badfb0aa5effa30d02906d1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 01:19:39 +0000 Subject: [PATCH 186/405] ensure checksum row is deleted when FID is deleted Stale rows are bad. --- lib/MogileFS/Store.pm | 2 ++ lib/MogileFS/Store/Postgres.pm | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 75978350..9cb74354 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1241,6 +1241,8 @@ sub delete_class { sub delete_fidid { my ($self, $fidid) = @_; + eval { $self->delete_checksum($fidid); }; + $self->condthrow; eval { $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); }; $self->condthrow; eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 28c729cc..244c12a3 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -742,6 +742,8 @@ sub mark_fidid_unreachable { sub delete_fidid { my ($self, $fidid) = @_; + $self->delete_checksum($fidid); + $self->condthrow; $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); $self->condthrow; $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); From 6ea29a47a667a984ce7ef64641eb4db4345d027c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 01:33:07 +0000 Subject: [PATCH 187/405] replicate generates proper CRLF for Content-MD5 header TODO: see if we can use LWP to avoid mistakes like this :x --- lib/MogileFS/Worker/Replicate.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 46c4a371..e5148238 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -550,7 +550,7 @@ sub http_copy { # and reject corrupted requests. no HTTP server should reject # a request for an unrecognized header my $b64digest = encode_base64($fid_checksum->{checksum}, ""); - $content_md5 = "Content-MD5: $b64digest\r\n"; + $content_md5 = "\r\nContent-MD5: $b64digest"; } $intercopy_cb ||= sub {}; From de77caf0150eaf7ee903ddd0ac3a3e4adcf4c599 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 02:08:01 +0000 Subject: [PATCH 188/405] flesh out fsck functionality for checksums Fsck behavior is based on existing behavior for size mismatches. size failures take precedence, since it's much cheaper to verify size match/mismatches than checksum mismatches. While checksum calculations are expensive and fsck is already parallel, so we do not parallelize checksum calculations on a per-FID basis. --- doc/checksums.txt | 12 +-- lib/MogileFS/DevFID.pm | 11 +++ lib/MogileFS/Worker/Fsck.pm | 103 ++++++++++++++++++++++- t/50-checksum.t | 157 +++++++++++++++++++++++++++++++++++- 4 files changed, 274 insertions(+), 9 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index 6e09f6ff..ce59921c 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -85,7 +85,11 @@ fsck If checksum row exists: verifies all copies match - same behavior as size mismatches in existing code + same behavior as size mismatches in existing code: + if good copies exist + delete bad ones and rereplicate + else if only bad copies exist + log error If checksum row is missing but class requires checksum: if all copies of the file have the same checksum: @@ -93,8 +97,4 @@ If checksum row is missing but class requires checksum: if any device containing a copy down: wait and revisit this FID later if any of the copies differ: - log everything we do - "trust" checksum of oldest file (via Last-Modified from HEAD) - place "bad" fid in a per-device "lost+found" area - use MOVE if possible, GET+PUT+DELETE fallback - recreate the file with a new fidid using the oldest file + log failure and all (hex) checksums + devids diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 7e0d5d49..3b6ff94c 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -68,6 +68,17 @@ sub size_on_disk { return MogileFS::HTTPFile->at($url)->size; } +# returns -1 on missing, +# undef on connectivity error, +# else checksum of file on disk (after HTTP GET or mogstored read) +sub checksum_on_disk { + my ($self, $alg, $ping_cb) = @_; + my $url = $self->get_url; + + # check that it has size (>0) and is reachable (not undef) + return MogileFS::HTTPFile->at($url)->digest($alg, $ping_cb); +} + # returns true if size seen matches fid's length sub size_matches { my $self = shift; diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 0b68125c..e9eacf42 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -26,6 +26,7 @@ use constant EV_FOUND_FID => "FOND"; use constant EV_RE_REPLICATE => "REPL"; use constant EV_BAD_COUNT => "BCNT"; use constant EV_BAD_CHECKSUM => "BSUM"; +use constant EV_NO_CHECKSUM => "NSUM"; use POSIX (); @@ -137,6 +138,11 @@ sub check_fid { return $fix->(); } + # missing checksum row + if ($fid->class->checksumtype && ! $fid->checksum) { + return $fix->(); + } + # in the fast case, do nothing else (don't check if assumed file # locations are actually there). in the fast case, all we do is # check the replication policy, which is already done, so finish. @@ -166,7 +172,7 @@ sub check_fid { }); if ($rv) { - return HANDLED; + return $fid->class->checksumtype ? $fix->() : HANDLED; } elsif ($err eq "stalled") { return STALLED; } elsif ($err eq "needfix") { @@ -209,6 +215,9 @@ sub fix_fid { my @good_devs; my @bad_devs; my %already_checked; # devid -> 1. + my $alg = $fid->class->checksumname; + my $checksums = {}; + my $ping_cb = sub { $self->still_alive }; my $check_dfids = sub { my $is_desperate_mode = shift; @@ -228,6 +237,23 @@ sub fix_fid { die "dev " . $dev->id . " unreachable" unless defined $disk_size; if ($disk_size == $fid->length) { + if ($alg) { + my $digest = $self->checksum_on_disk($dfid, $alg, $ping_cb); + unless (defined $digest) { + die "dev " . $dev->id . " unreachable"; + } + + # DELETE could've hit right after size check + if ($digest eq "-1") { + unless ($is_desperate_mode) { + $fid->fsck_log(EV_FILE_MISSING, $dev); + } + push @bad_devs, $dfid->device; + next; + } + push @{$checksums->{$digest} ||= []}, $dfid->device; + } + push @good_devs, $dfid->device; # if we were doing a desperate search, one is enough, we can stop now! return if $is_desperate_mode; @@ -283,6 +309,8 @@ sub fix_fid { # in case the devcount or similar was fixed. $fid->want_reload; + $self->fix_checksums($fid, $checksums) if $alg; + # Note: this will reload devids, if they called 'note_on_device' # or 'forget_about_device' unless ($fid->devids_meet_policy) { @@ -300,6 +328,16 @@ sub fix_fid { return HANDLED; } +sub forget_file_on_with_bad_checksums { + my ($self, $fid, $checksums) = @_; + foreach my $bdevs (values %$checksums) { + foreach my $bdev (@$bdevs) { + error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); + $fid->forget_about_device($bdev); + } + } +} + # returns 0 on missing, # undef on connectivity error, # else size of file on disk (after HTTP HEAD or mogstored stat) @@ -309,6 +347,69 @@ sub size_on_disk { return $dfid->size_on_disk; } +# returns -1 on missing, +# undef on connectivity error, +# else checksum of file on disk (after HTTP GET or mogstored read) +sub checksum_on_disk { + my ($self, $dfid, $alg, $ping_cb) = @_; + return undef if $dfid->device->dstate->is_perm_dead; + return $dfid->checksum_on_disk($alg, $ping_cb); +} + +sub all_checksums_bad { + my ($self, $fid, $checksums) = @_; + my $alg = $fid->class->checksumname or return; # class could've changed + my $cur_checksum = $fid->checksum; + my @err; + + foreach my $checksum (keys %$checksums) { + my $bdevs = join(",", map { $_->id } @{$checksums->{$checksum}}); + $checksum = unpack("H*", $checksum); + push @err, "$alg:$checksum on devids=[$bdevs]" + } + my $err = join('; ', @err); + my $cur = $cur_checksum ? "Expected: $cur_checksum" + : "No known valid checksum"; + error("all checksums bad: $err. $cur"); + $fid->fsck_log(EV_BAD_CHECKSUM); +} + +sub fix_checksums { + my ($self, $fid, $checksums) = @_; + my $cur_checksum = $fid->checksum; + my @all_checksums = keys(%$checksums); + + if (scalar(@all_checksums) == 1) { # all checksums match, good! + my $disk_checksum = $all_checksums[0]; + if ($cur_checksum) { + if ($cur_checksum->{checksum} ne $disk_checksum) { + $fid->fsck_log(EV_BAD_CHECKSUM); + } + } else { # fresh row to checksum + my $checksumtype = $fid->class->checksumtype or return; + my %row = ( + fid => $fid->id, + checksum => $disk_checksum, + checksumtype => $checksumtype, + ); + my $new_checksum = MogileFS::Checksum->new(\%row); + debug("creating new checksum=$new_checksum"); + $fid->fsck_log(EV_NO_CHECKSUM); + $new_checksum->save; + } + } elsif ($cur_checksum) { + my $good = delete($checksums->{$cur_checksum->{checksum}}); + if ($good && (scalar(@$good) > 0)) { + $self->forget_file_on_with_bad_checksums($fid, $checksums); + # will fail $fid->devids_meet_policy and re-replicate + } else { + $self->all_checksums_bad($fid, $checksums); + } + } else { + $self->all_checksums_bad($fid, $checksums); + } +} + 1; # Local Variables: diff --git a/t/50-checksum.t b/t/50-checksum.t index 4b1fc678..3216046b 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -11,7 +11,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 47; + plan tests => 99; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -71,9 +71,17 @@ sub wait_for_monitor { $be->{timeout} = $was; } +sub full_fsck { + my $tmptrack = shift; + + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); +} + wait_for_monitor($be); -my ($req, $rv, %opts, @paths); +my ($req, $rv, %opts, @paths, @fsck_log); my $ua = LWP::UserAgent->new; use Data::Dumper; @@ -160,6 +168,7 @@ use Digest::MD5 qw/md5_hex/; is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); } +# flip checksum classes around { my @classes; %opts = ( domain => "testdom", class => "1copy", mindevcount => 1 ); @@ -214,3 +223,147 @@ use Digest::MD5 qw/md5_hex/; $info = $mogc->file_info($key); is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after repl'); } + +# fsck recreates checksum when missing +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + $sto->delete_checksum($info->{fid}); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", "checksum is missing"); + full_fsck($tmptrack); + + do { + $info = $mogc->file_info($key); + } while ($info->{checksum} eq "MISSING" && sleep(0.1)); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after fsck'); + + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "NSUM", "missing checksum logged"); +} + +# fsck fixes a file corrupt file +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for lazycksum"); + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("LAZY"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "LAZY", "file successfully corrupted"); + is($ua->get($paths[1])->content, "lazy", "paths[1] not corrupted"); + + full_fsck($tmptrack); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 && sleep(0.1)); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "REPL", "repl for mismatched checksum logged"); + + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) < 2 && sleep(0.1)); + is(scalar(@paths), 2, "2 paths for key after replication"); + is($ua->get($paths[0])->content, "lazy", "paths[0] is correct"); + is($ua->get($paths[1])->content, "lazy", "paths[1] is correct"); + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum unchanged after fsck'); +} + +# fsck notices when all files are corrupt +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for lazycksum"); + + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("0000"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "0000", "successfully corrupted"); + + $req = HTTP::Request->new(PUT => $paths[1]); + $req->content("1111"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[1])->content, "1111", "successfully corrupted"); + + full_fsck($tmptrack); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 && sleep(0.1)); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for checksum"); + @paths = sort( map { $ua->get($_)->content } @paths); + is(join(', ', @paths), "0000, 1111", "corrupted content preserved"); +} + +# reuploaded checksum row clobbers old checksum +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + + ok($sto->get_checksum($info->{fid}), "old checksum row exists"); + + my $fh = $mogc->new_file($key, "2copies"); + print $fh "HAZY"; + ok(close($fh), "closed replacement file (lazycksum => HAZY)"); + + while ($sto->get_checksum($info->{fid})) { + sleep(0.5); + print "waiting...\n"; + } + is($sto->get_checksum($info->{fid}), undef, "old checksum is gone"); +} + +# completely corrupted files with no checksum row +{ + my $key = 'lazycksum'; + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) < 2 && sleep(0.1)); + is(scalar(@paths), 2, "replicated succesfully"); + + my $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("HAZY"), "checksum created on repl"); + + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("MAYB"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "MAYB", "successfully corrupted (MAYB)"); + + $req = HTTP::Request->new(PUT => $paths[1]); + $req->content("CRZY"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[1])->content, "CRZY", "successfully corrupted (CRZY)"); + + is($sto->delete_checksum($info->{fid}), 1, "nuke new checksum"); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", "checksum is MISSING"); + + full_fsck($tmptrack); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 && sleep(0.1)); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); +} From e8ceb02c0001847f63a2dacdd2cd394b93b7cc59 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 02:22:51 +0000 Subject: [PATCH 189/405] rename checksum{type,name} => hash{type,name} It reads more easily this way, at least to me. --- doc/checksums.txt | 2 +- lib/MogileFS/Checksum.pm | 24 ++++++++++++------------ lib/MogileFS/Class.pm | 4 ++-- lib/MogileFS/Store.pm | 32 ++++++++++++++++---------------- lib/MogileFS/Store/Postgres.pm | 10 +++++----- lib/MogileFS/Store/SQLite.pm | 4 ++-- lib/MogileFS/Worker/Fsck.pm | 12 ++++++------ lib/MogileFS/Worker/Query.pm | 20 ++++++++++---------- lib/MogileFS/Worker/Replicate.pm | 8 ++++---- t/01-domain-class.t | 12 ++++++------ t/50-checksum.t | 14 +++++++------- t/checksum.t | 2 +- t/store.t | 4 ++-- 13 files changed, 74 insertions(+), 74 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index ce59921c..650cf739 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -34,7 +34,7 @@ New optional parameters: If "checksumverify" is "1" and "checksum" is present, "create_close" will not return until it has verified the checksum. -If the storage class of a file specifies a valid "checksumtype", +If the storage class of a file specifies a valid "hashtype", the checksum is saved to the "checksum" table in the database. The client is _never_ required to supply a checksum. diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index da016f28..5f953c72 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -20,7 +20,7 @@ sub new { my $self = bless { fidid => $row->{fid}, checksum => $row->{checksum}, - checksumtype => $row->{checksumtype} + hashtype => $row->{hashtype} }, $class; return $self; @@ -31,10 +31,10 @@ sub from_string { my ($class, $fidid, $string) = @_; $string =~ /\A([\w-]+):([a-fA-F0-9]{32,128})\z/ or die "invalid checksum string"; - my $checksumname = $1; + my $hashname = $1; my $hexdigest = $2; - my $ref = $TYPE{$checksumname} or - die "invalid checksum name ($checksumname) from $string"; + my $ref = $TYPE{$hashname} or + die "invalid checksum name ($hashname) from $string"; my $checksum = pack("H*", $hexdigest); my $len = length($checksum); $len == $ref->{bytelen} or @@ -43,14 +43,14 @@ sub from_string { bless { fidid => $fidid, checksum => $checksum, - checksumtype => $NAME2TYPE{$checksumname}, + hashtype => $NAME2TYPE{$hashname}, }, $class; } -sub checksumname { +sub hashname { my $self = shift; - my $type = $self->{checksumtype}; - my $name = $TYPE2NAME{$type} or die "checksumtype=$type unknown"; + my $type = $self->{hashtype}; + my $name = $TYPE2NAME{$type} or die "hashtype=$type unknown"; return $name; } @@ -59,7 +59,7 @@ sub save { my $self = shift; my $sto = Mgd::get_store(); - $sto->set_checksum($self->{fidid}, $self->{checksumtype}, $self->{checksum}); + $sto->set_checksum($self->{fidid}, $self->{hashtype}, $self->{checksum}); } sub maybe_save { @@ -68,7 +68,7 @@ sub maybe_save { # $class may be undef as it could've been deleted between # create_open and create_close, we've never verified this before... - if ($class && $self->{checksumtype} eq $class->{checksumtype}) { + if ($class && $self->{hashtype} eq $class->{hashtype}) { $self->save; } } @@ -81,7 +81,7 @@ sub hexdigest { sub as_string { my $self = shift; - my $name = $self->checksumname; + my $name = $self->hashname; my $hexdigest = $self->hexdigest; "Checksum[f=$self->{fidid};$name=$hexdigest]" @@ -90,7 +90,7 @@ sub as_string { sub info { my $self = shift; - $self->checksumname . ':' . $self->hexdigest; + $self->hashname . ':' . $self->hexdigest; } 1; diff --git a/lib/MogileFS/Class.pm b/lib/MogileFS/Class.pm index bf9db55b..2d942aee 100644 --- a/lib/MogileFS/Class.pm +++ b/lib/MogileFS/Class.pm @@ -25,8 +25,8 @@ sub id { $_[0]{classid} } sub name { $_[0]{classname} } sub mindevcount { $_[0]{mindevcount} } sub dmid { $_[0]{dmid} } -sub checksumtype { $_[0]{checksumtype} } -sub checksumname { $MogileFS::Checksum::TYPE2NAME{$_[0]{checksumtype}} } +sub hashtype { $_[0]{hashtype} } +sub hashname { $MogileFS::Checksum::TYPE2NAME{$_[0]{hashtype}} } sub repl_policy_string { my $self = shift; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 9cb74354..17618203 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -19,7 +19,7 @@ use List::Util qw(shuffle); # 13: modifies 'server_settings.value' to TEXT for wider values # also adds a TEXT 'arg' column to file_to_queue for passing arguments # 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB -# 15: adds checksum table, adds 'checksumtype' column to 'class' table +# 15: adds checksum table, adds 'hashtype' column to 'class' table use constant SCHEMA_VERSION => 15; sub new { @@ -533,7 +533,7 @@ sub setup_database { $sto->upgrade_modify_server_settings_value; $sto->upgrade_add_file_to_queue_arg; $sto->upgrade_modify_device_size; - $sto->upgrade_add_class_checksumtype; + $sto->upgrade_add_class_hashtype; return 1; } @@ -601,7 +601,7 @@ sub TABLE_class { classname VARCHAR(50), UNIQUE (dmid,classname), mindevcount TINYINT UNSIGNED NOT NULL, - checksumtype TINYINT UNSIGNED + hashtype TINYINT UNSIGNED )" } @@ -810,7 +810,7 @@ sub TABLE_file_to_delete2 { sub TABLE_checksum { "CREATE TABLE checksum ( fid INT UNSIGNED NOT NULL PRIMARY KEY, - checksumtype TINYINT UNSIGNED NOT NULL, + hashtype TINYINT UNSIGNED NOT NULL, checksum VARBINARY(64) NOT NULL )" } @@ -836,10 +836,10 @@ sub upgrade_add_class_replpolicy { } } -sub upgrade_add_class_checksumtype { +sub upgrade_add_class_hashtype { my ($self) = @_; - unless ($self->column_type("class", "checksumtype")) { - $self->dowell("ALTER TABLE class ADD COLUMN checksumtype TINYINT UNSIGNED"); + unless ($self->column_type("class", "hashtype")) { + $self->dowell("ALTER TABLE class ADD COLUMN hashtype TINYINT UNSIGNED"); } } @@ -947,12 +947,12 @@ sub update_class_replpolicy { } # return 1 on success, die otherwise -sub update_class_checksumtype { +sub update_class_hashtype { my $self = shift; - my %arg = $self->_valid_params([qw(dmid classid checksumtype)], @_); + my %arg = $self->_valid_params([qw(dmid classid hashtype)], @_); eval { - $self->dbh->do("UPDATE class SET checksumtype=? WHERE dmid=? AND classid=?", - undef, $arg{checksumtype}, $arg{dmid}, $arg{classid}); + $self->dbh->do("UPDATE class SET hashtype=? WHERE dmid=? AND classid=?", + undef, $arg{hashtype}, $arg{dmid}, $arg{classid}); }; $self->condthrow; } @@ -1331,7 +1331,7 @@ sub get_all_classes { if ($self->cached_schema_version >= 10) { push @cols, 'replpolicy'; if ($self->cached_schema_version >= 15) { - push @cols, 'checksumtype'; + push @cols, 'hashtype'; } } my $cols = join(', ', @cols); @@ -2162,16 +2162,16 @@ sub random_fids_on_device { sub BLOB_BIND_TYPE { undef; } sub set_checksum { - my ($self, $fidid, $checksumtype, $checksum) = @_; + my ($self, $fidid, $hashtype, $checksum) = @_; my $dbh = $self->dbh; die "Your database does not support REPLACE! Reimplement set_checksum!" unless $self->can_replace; eval { my $sth = $dbh->prepare("REPLACE INTO checksum " . - "(fid, checksumtype, checksum) " . + "(fid, hashtype, checksum) " . "VALUES (?, ?, ?)"); $sth->bind_param(1, $fidid); - $sth->bind_param(2, $checksumtype); + $sth->bind_param(2, $hashtype); $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); $sth->execute; }; @@ -2181,7 +2181,7 @@ sub set_checksum { sub get_checksum { my ($self, $fidid) = @_; - $self->dbh->selectrow_hashref("SELECT fid, checksumtype, checksum " . + $self->dbh->selectrow_hashref("SELECT fid, hashtype, checksum " . "FROM checksum WHERE fid = ?", undef, $fidid); } diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 244c12a3..f0aad0e8 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -836,16 +836,16 @@ sub release_lock { sub BLOB_BIND_TYPE { { pg_type => PG_BYTEA } } sub set_checksum { - my ($self, $fidid, $checksumtype, $checksum) = @_; + my ($self, $fidid, $hashtype, $checksum) = @_; my $dbh = $self->dbh; $dbh->begin_work; eval { my $sth = $dbh->prepare("INSERT INTO checksum " . - "(fid, checksumtype, checksum) ". + "(fid, hashtype, checksum) ". "VALUES (?, ?, ?)"); $sth->bind_param(1, $fidid); - $sth->bind_param(2, $checksumtype); + $sth->bind_param(2, $hashtype); $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); $sth->execute; }; @@ -853,9 +853,9 @@ sub set_checksum { if ($self->was_duplicate_error) { eval { my $sth = $dbh->prepare("UPDATE checksum " . - "SET checksumtype = ?, checksum = ? " . + "SET hashtype = ?, checksum = ? " . "WHERE fid = ?"); - $sth->bind_param(1, $checksumtype); + $sth->bind_param(1, $hashtype); $sth->bind_param(2, $checksum, BLOB_BIND_TYPE); $sth->bind_param(3, $fidid); $sth->execute; diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 3cf6ee5e..5d8dce6e 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -111,7 +111,7 @@ sub TABLE_class { classid TINYINT UNSIGNED NOT NULL, classname VARCHAR(50), mindevcount TINYINT UNSIGNED NOT NULL, - checksumtype TINYINT UNSIGNED, + hashtype TINYINT UNSIGNED, UNIQUE (dmid,classid), UNIQUE (dmid,classname) )" @@ -243,7 +243,7 @@ sub upgrade_add_device_drain { sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } -sub upgrade_add_class_checksumtype { 1 } +sub upgrade_add_class_hashtype { 1 } # inefficient, but no warning and no locking sub should_begin_replicating_fidid { diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index e9eacf42..e9fed997 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -139,7 +139,7 @@ sub check_fid { } # missing checksum row - if ($fid->class->checksumtype && ! $fid->checksum) { + if ($fid->class->hashtype && ! $fid->checksum) { return $fix->(); } @@ -172,7 +172,7 @@ sub check_fid { }); if ($rv) { - return $fid->class->checksumtype ? $fix->() : HANDLED; + return $fid->class->hashtype ? $fix->() : HANDLED; } elsif ($err eq "stalled") { return STALLED; } elsif ($err eq "needfix") { @@ -215,7 +215,7 @@ sub fix_fid { my @good_devs; my @bad_devs; my %already_checked; # devid -> 1. - my $alg = $fid->class->checksumname; + my $alg = $fid->class->hashname; my $checksums = {}; my $ping_cb = sub { $self->still_alive }; @@ -358,7 +358,7 @@ sub checksum_on_disk { sub all_checksums_bad { my ($self, $fid, $checksums) = @_; - my $alg = $fid->class->checksumname or return; # class could've changed + my $alg = $fid->class->hashname or return; # class could've changed my $cur_checksum = $fid->checksum; my @err; @@ -386,11 +386,11 @@ sub fix_checksums { $fid->fsck_log(EV_BAD_CHECKSUM); } } else { # fresh row to checksum - my $checksumtype = $fid->class->checksumtype or return; + my $hashtype = $fid->class->hashtype or return; my %row = ( fid => $fid->id, checksum => $disk_checksum, - checksumtype => $checksumtype, + hashtype => $hashtype, ); my $new_checksum = MogileFS::Checksum->new(\%row); debug("creating new checksum=$new_checksum"); diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 40cb4e51..1c3d5e40 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -419,7 +419,7 @@ sub cmd_create_close { # However, we /always/ verify it if the client wants us to, even # if the class does not enforce or store it. if ($checksum && $args->{checksumverify}) { - my $alg = $checksum->checksumname; + my $alg = $checksum->hashname; my $actual = $httpfile->digest($alg); # expensive! if ($actual ne $checksum->{checksum}) { $failed->(); @@ -625,7 +625,7 @@ sub cmd_file_info { $ret->{domain} = Mgd::domain_factory()->get_by_id($fid->dmid)->name; my $class = Mgd::class_factory()->get_by_id($fid->dmid, $fid->classid); $ret->{class} = $class->name; - if ($class->{checksumtype}) { + if ($class->{hashtype}) { my $checksum = Mgd::get_store()->get_checksum($fid->id); if ($checksum) { $checksum = MogileFS::Checksum->new($checksum); @@ -872,11 +872,11 @@ sub cmd_create_class { return $self->err_line('invalid_replpolicy', $@) if $@; } - my $checksumtype = $args->{checksumtype}; - if ($checksumtype && $checksumtype ne 'NONE') { - my $tmp = $MogileFS::Checksum::NAME2TYPE{$checksumtype}; - return $self->err_line('invalid_checksumtype') unless $tmp; - $checksumtype = $tmp; + my $hashtype = $args->{hashtype}; + if ($hashtype && $hashtype ne 'NONE') { + my $tmp = $MogileFS::Checksum::NAME2TYPE{$hashtype}; + return $self->err_line('invalid_hashtype') unless $tmp; + $hashtype = $tmp; } my $sto = Mgd::get_store(); @@ -905,9 +905,9 @@ sub cmd_create_class { # don't erase an existing replpolicy if we're not setting a new one. $sto->update_class_replpolicy(dmid => $dmid, classid => $clsid, replpolicy => $replpolicy) if $replpolicy; - if ($checksumtype) { - $sto->update_class_checksumtype(dmid => $dmid, classid => $clsid, - checksumtype => $checksumtype eq 'NONE' ? undef : $checksumtype); + if ($hashtype) { + $sto->update_class_hashtype(dmid => $dmid, classid => $clsid, + hashtype => $hashtype eq 'NONE' ? undef : $hashtype); } # return success diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index e5148238..3e150bda 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -475,7 +475,7 @@ sub replicate { } my $worker = MogileFS::ProcManager->is_child or die; - my $digest = Digest->new($cls->checksumname) if $cls->checksumtype; + my $digest = Digest->new($cls->hashname) if $cls->hashtype; my $rv = http_copy( sdevid => $sdevid, ddevid => $ddevid, @@ -506,7 +506,7 @@ sub replicate { my $dfid = MogileFS::DevFID->new($ddevid, $fid); $dfid->add_to_db; if ($digest && !$fid->checksum) { - $sto->set_checksum($fidid, $cls->checksumtype, $digest->digest); + $sto->set_checksum($fidid, $cls->hashtype, $digest->digest); } push @on_devs, $devs->{$ddevid}; @@ -545,7 +545,7 @@ sub http_copy { my $content_md5 = ''; my $fid_checksum = $rfid->checksum; - if ($fid_checksum && $fid_checksum->checksumname eq "MD5") { + if ($fid_checksum && $fid_checksum->hashname eq "MD5") { # some HTTP servers may be able to verify Content-MD5 on PUT # and reject corrupted requests. no HTTP server should reject # a request for an unrecognized header @@ -704,7 +704,7 @@ sub http_copy { if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { if ($1 >= 200 && $1 <= 299) { if ($digest) { - my $alg = $rfid->class->checksumname; + my $alg = $rfid->class->hashname; if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { # dest device would've rejected us with a error, diff --git a/t/01-domain-class.t b/t/01-domain-class.t index ccf8f516..8ea0ccd1 100644 --- a/t/01-domain-class.t +++ b/t/01-domain-class.t @@ -128,10 +128,10 @@ ok($domfac != $classfac, "factories are not the same singleton"); replpolicy => 'MultipleHosts(6)'), 'can set replpolicy'); ok($sto->update_class_name(dmid => $domid, classid => $clsid2, classname => 'boo'), 'can rename class'); - ok($sto->update_class_checksumtype(dmid => $domid, classid => $clsid2, - checksumtype => 1), 'can set checksum type'); - ok($sto->update_class_checksumtype(dmid => $domid, classid => $clsid2, - checksumtype => undef), 'can unset checksum type'); + ok($sto->update_class_hashtype(dmid => $domid, classid => $clsid2, + hashtype => 1), 'can set checksum type'); + ok($sto->update_class_hashtype(dmid => $domid, classid => $clsid2, + hashtype => undef), 'can unset checksum type'); } { @@ -146,7 +146,7 @@ ok($domfac != $classfac, "factories are not the same singleton"); 'classid' => '1', 'mindevcount' => '2', 'classname' => 'bar', - 'checksumtype' => undef, + 'hashtype' => undef, }, 'class bar came back'); # We edited class2 a bunch, make sure that all stuck. is_deeply($classes[1], { @@ -155,6 +155,6 @@ ok($domfac != $classfac, "factories are not the same singleton"); 'classid' => '2', 'mindevcount' => '3', 'classname' => 'boo', - 'checksumtype' => undef, + 'hashtype' => undef, }, 'class baz came back as boo'); } diff --git a/t/50-checksum.t b/t/50-checksum.t index 3216046b..f5627fb6 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -136,7 +136,7 @@ use Digest::MD5 qw/md5_hex/; # enable saving MD5 checksums in "2copies" class { %opts = ( domain => "testdom", class => "2copies", - checksumtype => "MD5", mindevcount => 2 ); + hashtype => "MD5", mindevcount => 2 ); ok($be->do_request("update_class", \%opts), "update class"); wait_for_monitor($be); } @@ -173,20 +173,20 @@ use Digest::MD5 qw/md5_hex/; my @classes; %opts = ( domain => "testdom", class => "1copy", mindevcount => 1 ); - $opts{checksumtype} = "NONE"; + $opts{hashtype} = "NONE"; ok($be->do_request("update_class", \%opts), "update class"); @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; - is($classes[0]->{checksumtype}, undef, "checksumtype unset"); + is($classes[0]->{hashtype}, undef, "hashtype unset"); - $opts{checksumtype} = "MD5"; + $opts{hashtype} = "MD5"; ok($be->do_request("update_class", \%opts), "update class"); @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; - is($classes[0]->{checksumtype}, 1, "checksumtype is 1 (MD5)"); + is($classes[0]->{hashtype}, 1, "hashtype is 1 (MD5)"); - $opts{checksumtype} = "NONE"; + $opts{hashtype} = "NONE"; ok($be->do_request("update_class", \%opts), "update class"); @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; - is($classes[0]->{checksumtype}, undef, "checksumtype unset"); + is($classes[0]->{hashtype}, undef, "hashtype unset"); } # wait for replicate to verify existing (valid) checksum diff --git a/t/checksum.t b/t/checksum.t index 634b4985..3c1c5a4c 100644 --- a/t/checksum.t +++ b/t/checksum.t @@ -22,7 +22,7 @@ $sto->set_checksum(6, 1, md5("asdf")); my $hash = $sto->get_checksum(6); my $csum = MogileFS::Checksum->new($hash); is(md5_hex("asdf"), $csum->hexdigest); -is("MD5", $csum->checksumname); +is("MD5", $csum->hashname); my $zero = "MD5:d41d8cd98f00b204e9800998ecf8427e"; $csum = MogileFS::Checksum->from_string(6, $zero); diff --git a/t/store.t b/t/store.t index f2ece189..1a39ffd3 100644 --- a/t/store.t +++ b/t/store.t @@ -182,13 +182,13 @@ $sto->set_checksum(6, 1, md5("FOO")); my $hash = $sto->get_checksum(6); ok($hash->{checksum} eq md5("FOO"), "checksum matches expected"); ok($hash->{fid} == 6, "checksum fid set correctly"); -ok($hash->{checksumtype} == 1, "checksumtype set correctly"); +ok($hash->{hashtype} == 1, "hashtype set correctly"); $sto->set_checksum(6, 2, md5("MOO")); $hash = $sto->get_checksum(6); ok($hash->{checksum} eq md5("MOO"), "checksum matches expected"); ok($hash->{fid} == 6, "checksum fid set correctly"); -ok($hash->{checksumtype} == 2, "checksumtype set correctly"); +ok($hash->{hashtype} == 2, "hashtype set correctly"); ok(1 == $sto->delete_checksum(6), "checksum deleted OK"); ok(0 == $sto->delete_checksum(6), "checksum delete MISS"); From 739e2753b5a88f85d0b723a4f5010410e11e4add Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 02:24:36 +0000 Subject: [PATCH 190/405] Newer SQLite _can_ ALTER TABLE .. ADD COLUMN in some cases I'll be testing checksum functionality on my home installation before testing it on other installations, and I run SQLite at home. ref: http://www.sqlite.org/lang_altertable.html --- lib/MogileFS/Store/SQLite.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 5d8dce6e..d1bd6f89 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -243,7 +243,6 @@ sub upgrade_add_device_drain { sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } -sub upgrade_add_class_hashtype { 1 } # inefficient, but no warning and no locking sub should_begin_replicating_fidid { From 9fa545300f9a9f0f0337c125cc7614c81566e3b2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 08:21:41 +0000 Subject: [PATCH 191/405] always use HTTPFile->digest with a the ping callback We need to ensure the worker stays alive during MD5 generation, especially on large files that can take many seconds to verify. --- lib/MogileFS/Worker/Query.pm | 2 +- lib/MogileFS/Worker/Replicate.pm | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 1c3d5e40..b8e98a97 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -420,7 +420,7 @@ sub cmd_create_close { # if the class does not enforce or store it. if ($checksum && $args->{checksumverify}) { my $alg = $checksum->hashname; - my $actual = $httpfile->digest($alg); # expensive! + my $actual = $httpfile->digest($alg, sub { $self->still_alive }); if ($actual ne $checksum->{checksum}) { $failed->(); $actual = "$alg:" . unpack("H*", $actual); diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 3e150bda..98981e90 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -712,7 +712,8 @@ sub http_copy { return 1; } my $durl = "http://$dhostip:$dport$dpath"; - my $actual = MogileFS::HTTPFile->at($durl)->digest($alg); + my $httpfile = MogileFS::HTTPFile->at($durl); + my $actual = $httpfile->digest($alg, $intercopy_cb); if ($actual ne $digest) { my $expect = unpack("H*", $digest); $actual = unpack("H*", $actual); From 51bf68060c5b285f3d04796e71422cb8e6417617 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Nov 2011 22:13:24 +0000 Subject: [PATCH 192/405] get_domains returns hashtype as a string This special-cases "NONE" for no hash for our users. --- lib/MogileFS/Class.pm | 5 +++++ lib/MogileFS/Worker/Query.pm | 1 + 2 files changed, 6 insertions(+) diff --git a/lib/MogileFS/Class.pm b/lib/MogileFS/Class.pm index 2d942aee..ad5a9acb 100644 --- a/lib/MogileFS/Class.pm +++ b/lib/MogileFS/Class.pm @@ -28,6 +28,11 @@ sub dmid { $_[0]{dmid} } sub hashtype { $_[0]{hashtype} } sub hashname { $MogileFS::Checksum::TYPE2NAME{$_[0]{hashtype}} } +sub hashtype_string { + my $self = shift; + $self->hashtype ? $self->hashname : "NONE"; +} + sub repl_policy_string { my $self = shift; return $self->{replpolicy} ? $self->{replpolicy} diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index b8e98a97..44463825 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1033,6 +1033,7 @@ sub cmd_get_domains { $ret->{"domain${dm_n}class${cl_n}mindevcount"} = $cl->mindevcount; $ret->{"domain${dm_n}class${cl_n}replpolicy"} = $cl->repl_policy_string; + $ret->{"domain${dm_n}class${cl_n}hashtype"} = $cl->hashtype_string; } $ret->{"domain${dm_n}classes"} = $cl_n; } From d9527ab7380b7d7383529301a84ea582b6198adf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 2 Dec 2011 15:54:43 -0800 Subject: [PATCH 193/405] doc/checksums: clarify binary column type for various DBs We don't actually use the BLOB type anywhere, as checksums are definitely not "L"(arge) objects. --- doc/checksums.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index 650cf739..c3a969eb 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -1,8 +1,9 @@ database -------- -checksum hashes are represented as binary (BLOB) columns in the -database. They can be up to 64 bytes large for SHA512 or WHIRLPOOL. +checksum hashes are represented as binary (VARBINARY for MySQL/SQLite, +bytea for Postgres) columns in the database. They can be up to 64 bytes +large for SHA512 or WHIRLPOOL. tracker protocol ---------------- From dce4bb5c1f6d2dcd9762c7a63c8be297117ca7f7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jan 2012 12:59:50 -0800 Subject: [PATCH 194/405] httpfile: fix timeout comparison when digesting via mogstored The timeout comparison is wrong and causing ping_cb to never fire. This went unnoticed since I have reasonably fast disks on my storage nodes and the <$sock> operation was able to complete before being hit by a watchdog timeout. --- lib/MogileFS/HTTPFile.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 6da498af..487d59f7 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -178,7 +178,7 @@ retry: $expiry = Time::HiRes::time() + $response_timeout; while (!wait_for_readability(fileno($sock), 1.0) && - (Time::HiRes::time() > $expiry)) { + (Time::HiRes::time() < $expiry)) { $ping_cb->(); } From a26811cf6d035d63833e8d88b9d9e5d5ea32aed0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Mar 2012 18:58:15 -0800 Subject: [PATCH 195/405] fsck: add fsck_auto_checksum server setting Enabling this setting allows fsck to checksum all replicas on all devices and report any corrupted copies regardless of per-class settings. This feature is useful for determining if enabling checksums on certain classes is necessary and will also benefit users who cannot afford to store checksums in the database. --- lib/MogileFS/Checksum.pm | 6 ++++ lib/MogileFS/Config.pm | 10 ++++++ lib/MogileFS/Worker/Fsck.pm | 67 ++++++++++++++++++++++++++++--------- t/50-checksum.t | 36 +++++++++++++++++++- 4 files changed, 102 insertions(+), 17 deletions(-) diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 5f953c72..ba4a296b 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -15,6 +15,12 @@ my %TYPE = ( our %NAME2TYPE = map { $_ => $TYPE{$_}->{type} } keys(%TYPE); our %TYPE2NAME = map { $NAME2TYPE{$_} => $_ } keys(%NAME2TYPE); +sub valid_alg { + my ($class, $alg) = @_; + + defined($alg) && defined($TYPE{$alg}); +} + sub new { my ($class, $row) = @_; my $self = bless { diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index f5c67faa..3a198b95 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -317,6 +317,7 @@ sub hostname { sub server_setting_is_readable { my ($class, $key) = @_; + return 1 if $key eq 'fsck_auto_checksum'; return 0 if $key =~ /^fsck_/; return 1; } @@ -387,6 +388,15 @@ sub server_setting_is_writable { # should probably restrict to (\d+) if ($key =~ /^queue_/) { return $any }; + if ($key eq "fsck_auto_checksum") { + return sub { + my $v = shift; + return "0" if $v =~ /^(0|f|off|n)/i; + return $v if MogileFS::Checksum->valid_alg($v); + die "Not a valid checksum algorithm"; + } + } + return 0; } diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index e9fed997..ce9f72e9 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -4,6 +4,7 @@ use strict; use base 'MogileFS::Worker'; use fields ( 'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? + 'opt_auto_checksum', # (off|MD5) checksum regardless of per-class hashtype to look for mismatches ); use MogileFS::Util qw(every error debug); use MogileFS::Config; @@ -27,6 +28,7 @@ use constant EV_RE_REPLICATE => "REPL"; use constant EV_BAD_COUNT => "BCNT"; use constant EV_BAD_CHECKSUM => "BSUM"; use constant EV_NO_CHECKSUM => "NSUM"; +use constant EV_MULTI_CHECKSUM => "MSUM"; use POSIX (); @@ -66,6 +68,8 @@ sub work { return unless @fids; $self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; + my $alg = MogileFS::Config->server_setting_cached("fsck_auto_checksum"); + $self->{opt_auto_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; MogileFS::FID->mass_load_devids(@fids); # don't sleep in loop, next round, since we found stuff to work on @@ -143,6 +147,10 @@ sub check_fid { return $fix->(); } + if ($self->{opt_auto_checksum}) { + return $fix->(); + } + # in the fast case, do nothing else (don't check if assumed file # locations are actually there). in the fast case, all we do is # check the replication policy, which is already done, so finish. @@ -215,7 +223,7 @@ sub fix_fid { my @good_devs; my @bad_devs; my %already_checked; # devid -> 1. - my $alg = $fid->class->hashname; + my $alg = $fid->class->hashname || $self->{opt_auto_checksum}; my $checksums = {}; my $ping_cb = sub { $self->still_alive }; @@ -356,10 +364,8 @@ sub checksum_on_disk { return $dfid->checksum_on_disk($alg, $ping_cb); } -sub all_checksums_bad { - my ($self, $fid, $checksums) = @_; - my $alg = $fid->class->hashname or return; # class could've changed - my $cur_checksum = $fid->checksum; +sub bad_checksums_errmsg { + my ($self, $alg, $checksums) = @_; my @err; foreach my $checksum (keys %$checksums) { @@ -367,7 +373,26 @@ sub all_checksums_bad { $checksum = unpack("H*", $checksum); push @err, "$alg:$checksum on devids=[$bdevs]" } - my $err = join('; ', @err); + + return join('; ', @err); +} + +# we don't now what checksum the file is supposed to be, but some +# of the devices had checksums that didn't match the other(s). +sub auto_checksums_bad { + my ($self, $fid, $checksums) = @_; + my $alg = $self->{opt_auto_checksum}; + my $err = $self->bad_checksums_errmsg($alg, $checksums); + + error("$fid has multiple checksums: $err"); + $fid->fsck_log(EV_MULTI_CHECKSUM); +} + +sub all_checksums_bad { + my ($self, $fid, $checksums) = @_; + my $alg = $fid->class->hashname or return; # class could've changed + my $cur_checksum = $fid->checksum; + my $err = $self->bad_checksums_errmsg($alg, $checksums); my $cur = $cur_checksum ? "Expected: $cur_checksum" : "No known valid checksum"; error("all checksums bad: $err. $cur"); @@ -386,16 +411,24 @@ sub fix_checksums { $fid->fsck_log(EV_BAD_CHECKSUM); } } else { # fresh row to checksum - my $hashtype = $fid->class->hashtype or return; - my %row = ( - fid => $fid->id, - checksum => $disk_checksum, - hashtype => $hashtype, - ); - my $new_checksum = MogileFS::Checksum->new(\%row); - debug("creating new checksum=$new_checksum"); - $fid->fsck_log(EV_NO_CHECKSUM); - $new_checksum->save; + my $hashtype = $fid->class->hashtype; + + # we store this in the database + if ($hashtype) { + my %row = ( + fid => $fid->id, + checksum => $disk_checksum, + hashtype => $hashtype, + ); + my $new_checksum = MogileFS::Checksum->new(\%row); + debug("creating new checksum=$new_checksum"); + $fid->fsck_log(EV_NO_CHECKSUM); + $new_checksum->save; + } else { + my $hex_checksum = unpack("H*", $disk_checksum); + my $alg = $self->{opt_auto_checksum}; + debug("fsck_auto_checksum good: $fid $alg:$hex_checksum"); + } } } elsif ($cur_checksum) { my $good = delete($checksums->{$cur_checksum->{checksum}}); @@ -405,6 +438,8 @@ sub fix_checksums { } else { $self->all_checksums_bad($fid, $checksums); } + } elsif ($self->{opt_auto_checksum}) { + $self->auto_checksums_bad($fid, $checksums); } else { $self->all_checksums_bad($fid, $checksums); } diff --git a/t/50-checksum.t b/t/50-checksum.t index f5627fb6..ed69c80e 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -11,7 +11,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 99; + plan tests => 117; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -74,6 +74,7 @@ sub wait_for_monitor { sub full_fsck { my $tmptrack = shift; + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); ok($tmptrack->mogadm("fsck", "start"), "started fsck"); @@ -367,3 +368,36 @@ use Digest::MD5 qw/md5_hex/; is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); } + +# disable MD5 checksums in "2copies" class +{ + %opts = ( domain => "testdom", class => "2copies", + hashtype => "NONE", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# use fsck_auto_checksum instead of per-class checksums +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + $sto->delete_checksum($info->{fid}); + + ok($tmptrack->mogadm("settings", "set", "fsck_auto_checksum", "MD5"), "enable fsck_auto_checksum=MD5"); + wait_for_monitor($be); + full_fsck($tmptrack); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 && sleep(0.1)); + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "MSUM", "MSUM logged"); +} + +# ensure server setting is visible +use MogileFS::Admin; +{ + my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); + my $settings = $moga->server_settings; + is($settings->{fsck_auto_checksum}, 'MD5', "fsck_auto_checksum server setting visible"); +} From 37ad4cc6852cc9026588fda162edda5e935de275 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Mar 2012 18:58:16 -0800 Subject: [PATCH 196/405] checksums: disable all hash algorithms except MD5 MD5 is faster than SHA1, and much faster than any of the SHA2 variants. Given the time penalty of fsck is already high with MD5, prevent folks from shooting themselves in the foot with extremely expensive hash algorithms. --- lib/MogileFS/Checksum.pm | 46 ++++++++++++++++++++++++++---- lib/Mogstored/SideChannelClient.pm | 4 ++- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index ba4a296b..29a614c7 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -5,11 +5,13 @@ use overload '""' => \&as_string; my %TYPE = ( "MD5" => { type => 1, bytelen => 128 / 8 }, - "SHA-1" => { type => 2, bytelen => 160 / 8 }, - "SHA-224" => { type => 3, bytelen => 224 / 8 }, - "SHA-256" => { type => 4, bytelen => 256 / 8 }, - "SHA-384" => { type => 5, bytelen => 384 / 8 }, - "SHA-512" => { type => 6, bytelen => 512 / 8 }, + +# see POD for rationale below +# "SHA-1" => { type => 2, bytelen => 160 / 8 }, +# "SHA-224" => { type => 3, bytelen => 224 / 8 }, +# "SHA-256" => { type => 4, bytelen => 256 / 8 }, +# "SHA-384" => { type => 5, bytelen => 384 / 8 }, +# "SHA-512" => { type => 6, bytelen => 512 / 8 }, ); our %NAME2TYPE = map { $_ => $TYPE{$_}->{type} } keys(%TYPE); @@ -100,3 +102,37 @@ sub info { } 1; + +__END__ + +=head1 NAME + +MogileFS::Checksum - Checksums handling for MogileFS + +=head1 ABOUT + +MogileFS supports optional MD5 checksums. Checksums can be stored +in the database on a per-class basis or they can be enabled globally +during fsck-only. + +Enabling checksums will greatly increase the time and I/O required for +fsck as all files on all devices to be checksummed need to be reread. + +Fsck and replication will use significantly more network bandwidth if +the mogstored is not used or if mogstored_stream_port is +not configured correctly. Using Perlbal to 1.80 or later for HTTP +will also speed up checksum verification during replication using +the Content-MD5 HTTP header. + +=head1 ALGORITHMS + +While we can easily enable some or all of the SHA family of hash +algorithms, they all perform worse than MD5. Checksums are intended to +detect unintentional corruption due to hardware/software errors, not +malicious data replacement. Since MogileFS does not implement any +security/authorization on its own to protect against malicious use, the +use of checksums in MogileFS to protect against malicious data +replacement is misguided. + +If you have a use case which requires a stronger hash algorithm, +please speak up on the mailing list at L. diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index bf235cba..4f71a93b 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -68,7 +68,9 @@ sub read_buf_consume { } $self->watch_read(0); Mogstored->iostat_subscribe($self); - } elsif ($cmd =~ /^(MD5|SHA-(?:1|224|256|384|512)) (\S+)$/) { + } elsif ($cmd =~ /^(MD5) (\S+)$/) { + # we can easily enable other hash algorithms with the above + # regexp, but we won't for now (see MogileFS::Checksum) my $alg = $1; my $uri = $self->validate_uri($2); return unless defined($uri); From 35dcf3e768338ebf5ef4420661ce5a1965b0fbda Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 11 Mar 2012 03:24:32 +0000 Subject: [PATCH 197/405] fsck_checksum setting replaces fsck_auto_checksum Unlike the setting it replaces, this new setting can be used to disable checksumming entirely, regardless of per-class options. fsck_checksum=(class|off|MD5) class - is the default, fsck based on per-class hashtype off - skip all checksumming regardless of per-class setting MD5 - same as the previous fsck_auto_checksum=MD5 --- lib/MogileFS/Config.pm | 7 ++--- lib/MogileFS/Worker/Fsck.pm | 32 ++++++++++++---------- t/50-checksum.t | 53 +++++++++++++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 22 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 3a198b95..578f7716 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -317,7 +317,7 @@ sub hostname { sub server_setting_is_readable { my ($class, $key) = @_; - return 1 if $key eq 'fsck_auto_checksum'; + return 1 if $key eq 'fsck_checksum'; return 0 if $key =~ /^fsck_/; return 1; } @@ -388,10 +388,11 @@ sub server_setting_is_writable { # should probably restrict to (\d+) if ($key =~ /^queue_/) { return $any }; - if ($key eq "fsck_auto_checksum") { + if ($key eq "fsck_checksum") { return sub { my $v = shift; - return "0" if $v =~ /^(0|f|off|n)/i; + return "off" if $v eq "off"; + return undef if $v eq "class"; return $v if MogileFS::Checksum->valid_alg($v); die "Not a valid checksum algorithm"; } diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index ce9f72e9..92071191 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -4,7 +4,7 @@ use strict; use base 'MogileFS::Worker'; use fields ( 'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? - 'opt_auto_checksum', # (off|MD5) checksum regardless of per-class hashtype to look for mismatches + 'opt_checksum', # (class|off|MD5) checksum mode ); use MogileFS::Util qw(every error debug); use MogileFS::Config; @@ -68,8 +68,12 @@ sub work { return unless @fids; $self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; - my $alg = MogileFS::Config->server_setting_cached("fsck_auto_checksum"); - $self->{opt_auto_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; + my $alg = MogileFS::Config->server_setting_cached("fsck_checksum"); + if (defined($alg) && $alg eq "off") { + $self->{opt_checksum} = "off"; + } else { + $self->{opt_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; + } MogileFS::FID->mass_load_devids(@fids); # don't sleep in loop, next round, since we found stuff to work on @@ -147,15 +151,15 @@ sub check_fid { return $fix->(); } - if ($self->{opt_auto_checksum}) { - return $fix->(); - } - # in the fast case, do nothing else (don't check if assumed file # locations are actually there). in the fast case, all we do is # check the replication policy, which is already done, so finish. return HANDLED if $self->{opt_nostat}; + if ($self->{opt_checksum}) { + return $fix->(); + } + # stat each device to see if it's still there. on first problem, # stop and go into the slow(er) fix function. my $err; @@ -223,7 +227,7 @@ sub fix_fid { my @good_devs; my @bad_devs; my %already_checked; # devid -> 1. - my $alg = $fid->class->hashname || $self->{opt_auto_checksum}; + my $alg = $fid->class->hashname || $self->{opt_checksum}; my $checksums = {}; my $ping_cb = sub { $self->still_alive }; @@ -245,7 +249,7 @@ sub fix_fid { die "dev " . $dev->id . " unreachable" unless defined $disk_size; if ($disk_size == $fid->length) { - if ($alg) { + if ($alg && $alg ne "off") { my $digest = $self->checksum_on_disk($dfid, $alg, $ping_cb); unless (defined $digest) { die "dev " . $dev->id . " unreachable"; @@ -317,7 +321,7 @@ sub fix_fid { # in case the devcount or similar was fixed. $fid->want_reload; - $self->fix_checksums($fid, $checksums) if $alg; + $self->fix_checksums($fid, $checksums) if $alg && $alg ne "off"; # Note: this will reload devids, if they called 'note_on_device' # or 'forget_about_device' @@ -381,7 +385,7 @@ sub bad_checksums_errmsg { # of the devices had checksums that didn't match the other(s). sub auto_checksums_bad { my ($self, $fid, $checksums) = @_; - my $alg = $self->{opt_auto_checksum}; + my $alg = $self->{opt_checksum}; my $err = $self->bad_checksums_errmsg($alg, $checksums); error("$fid has multiple checksums: $err"); @@ -426,8 +430,8 @@ sub fix_checksums { $new_checksum->save; } else { my $hex_checksum = unpack("H*", $disk_checksum); - my $alg = $self->{opt_auto_checksum}; - debug("fsck_auto_checksum good: $fid $alg:$hex_checksum"); + my $alg = $self->{opt_checksum}; + debug("fsck_checksum=auto good: $fid $alg:$hex_checksum"); } } } elsif ($cur_checksum) { @@ -438,7 +442,7 @@ sub fix_checksums { } else { $self->all_checksums_bad($fid, $checksums); } - } elsif ($self->{opt_auto_checksum}) { + } elsif ($self->{opt_checksum}) { $self->auto_checksums_bad($fid, $checksums); } else { $self->all_checksums_bad($fid, $checksums); diff --git a/t/50-checksum.t b/t/50-checksum.t index ed69c80e..a1113f0c 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -11,7 +11,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 117; + plan tests => 141; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -43,6 +43,7 @@ ok($tmptrack); my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); $admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); my $mogc = MogileFS::Client->new( domain => "testdom", hosts => [ "127.0.0.1:7001" ], @@ -377,13 +378,13 @@ use Digest::MD5 qw/md5_hex/; wait_for_monitor($be); } -# use fsck_auto_checksum instead of per-class checksums +# use fsck_checksum=MD5 instead of per-class checksums { my $key = 'lazycksum'; my $info = $mogc->file_info($key); $sto->delete_checksum($info->{fid}); - ok($tmptrack->mogadm("settings", "set", "fsck_auto_checksum", "MD5"), "enable fsck_auto_checksum=MD5"); + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "MD5"), "enable fsck_checksum=MD5"); wait_for_monitor($be); full_fsck($tmptrack); do { @@ -397,7 +398,49 @@ use Digest::MD5 qw/md5_hex/; # ensure server setting is visible use MogileFS::Admin; { - my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); my $settings = $moga->server_settings; - is($settings->{fsck_auto_checksum}, 'MD5', "fsck_auto_checksum server setting visible"); + is($settings->{fsck_checksum}, 'MD5', "fsck_checksum server setting visible"); +} + +use MogileFS::Config; + +# disable checksumming entirely, regardless of class setting +{ + %opts = ( domain => "testdom", class => "2copies", + hashtype => "MD5", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); + + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "off"), "set fsck_checksum=off"); + wait_for_monitor($be); + my $settings = $moga->server_settings; + is($settings->{fsck_checksum}, 'off', "fsck_checksum server setting visible"); + full_fsck($tmptrack); + my $nr; + foreach my $i (0..100) { + $nr = $sto->file_queue_length(FSCK_QUEUE); + last if ($nr eq '0'); + sleep 0.1; + } + is($nr, '0', "fsck finished"); + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 0, "fsck log is empty with fsck_checksum=off"); +} + +# set fsck_checksum=class and ensure that works again +{ + my $info = $mogc->file_info('lazycksum'); + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "class"), "set fsck_checksum=class"); + wait_for_monitor($be); + my $settings = $moga->server_settings; + ok(! defined($settings->{fsck_checksum}), "fsck_checksum=class server setting hidden (default)"); + full_fsck($tmptrack); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 && sleep(0.1)); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); } From a4af4caf2ecb896961fa111797cc2fe5be46aeeb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 12 Mar 2012 03:01:16 -0700 Subject: [PATCH 198/405] checksums: use a low-priority task queue for fsck digests MD5 is I/O-intensive, and having fsck request MD5s concurrently ends up causing I/O contention on rotational drives with high seek latency. So limit fsck MD5 requests to a single job per device. --- lib/MogileFS/DevFID.pm | 4 +-- lib/MogileFS/HTTPFile.pm | 9 ++++--- lib/MogileFS/Worker/Fsck.pm | 2 +- lib/Mogstored/SideChannelClient.pm | 27 +++++++++++++++---- lib/Mogstored/TaskQueue.pm | 42 ++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 lib/Mogstored/TaskQueue.pm diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 3b6ff94c..f62c0579 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -72,11 +72,11 @@ sub size_on_disk { # undef on connectivity error, # else checksum of file on disk (after HTTP GET or mogstored read) sub checksum_on_disk { - my ($self, $alg, $ping_cb) = @_; + my ($self, $alg, $ping_cb, $reason) = @_; my $url = $self->get_url; # check that it has size (>0) and is reachable (not undef) - return MogileFS::HTTPFile->at($url)->digest($alg, $ping_cb); + return MogileFS::HTTPFile->at($url)->digest($alg, $ping_cb, $reason); } # returns true if size seen matches fid's length diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 487d59f7..fa3d7a54 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -139,15 +139,16 @@ sub size { } sub digest_mgmt { - my ($self, $alg, $ping_cb) = @_; + my ($self, $alg, $ping_cb, $reason) = @_; my $mogconn = $self->host->mogstored_conn; my $node_timeout = MogileFS->config("node_timeout"); my $sock; my $rv; my $expiry; + $reason = defined($reason) ? " $reason" : ""; my $uri = $self->{uri}; - my $req = "$alg $uri\r\n"; + my $req = "$alg $uri$reason\r\n"; my $reqlen = length $req; # a dead/stale socket may not be detected until we try to recv on it @@ -237,8 +238,8 @@ sub digest_http { } sub digest { - my ($self, $alg, $ping_cb) = @_; - my $digest = $self->digest_mgmt($alg, $ping_cb); + my ($self, $alg, $ping_cb, $reason) = @_; + my $digest = $self->digest_mgmt($alg, $ping_cb, $reason); return $digest if ($digest && $digest ne FILE_MISSING); diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 92071191..b6f5cade 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -365,7 +365,7 @@ sub size_on_disk { sub checksum_on_disk { my ($self, $dfid, $alg, $ping_cb) = @_; return undef if $dfid->device->dstate->is_perm_dead; - return $dfid->checksum_on_disk($alg, $ping_cb); + return $dfid->checksum_on_disk($alg, $ping_cb, "fsck"); } sub bad_checksums_errmsg { diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 4f71a93b..fe900e8f 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -10,6 +10,10 @@ use fields ( ); use Digest; use POSIX qw(O_RDONLY); +use Mogstored::TaskQueue; + +# TODO: interface to make this tunable +my %digest_queues; # needed since we're pretending to be a Perlbal::Socket... never idle out sub max_idle_time { return 0; } @@ -68,14 +72,15 @@ sub read_buf_consume { } $self->watch_read(0); Mogstored->iostat_subscribe($self); - } elsif ($cmd =~ /^(MD5) (\S+)$/) { + } elsif ($cmd =~ /^(MD5) (\S+)(?: (\w+))?$/) { # we can easily enable other hash algorithms with the above # regexp, but we won't for now (see MogileFS::Checksum) my $alg = $1; my $uri = $self->validate_uri($2); + my $reason = $3; return unless defined($uri); - return $self->digest($alg, $path, $uri); + return $self->digest($alg, $path, $uri, $reason); } else { # we don't understand this so pass it on to manage command interface my @out; @@ -117,9 +122,10 @@ sub die_gracefully { } sub digest { - my ($self, $alg, $path, $uri) = @_; + my ($self, $alg, $path, $uri, $reason) = @_; $self->watch_read(0); + Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { my $fh = shift; @@ -128,7 +134,16 @@ sub digest { return; } if ($fh) { - $self->digest_fh($alg, $fh, $uri); + my $queue; + + if ($reason && $reason eq "fsck") { + # fstat(2) should return immediately, no AIO needed + my $devid = (stat($fh))[0]; + $queue = $digest_queues{$devid} ||= Mogstored::TaskQueue->new; + $queue->run(sub { $self->digest_fh($alg, $fh, $uri, $queue) }); + } else { + $self->digest_fh($alg, $fh, $uri); + } } else { $self->write("$uri $alg=-1\r\n"); $self->after_long_request; @@ -137,7 +152,7 @@ sub digest { } sub digest_fh { - my ($self, $alg, $fh, $uri) = @_; + my ($self, $alg, $fh, $uri, $queue) = @_; my $offset = 0; my $data = ''; my $digest = Digest->new($alg); @@ -155,11 +170,13 @@ sub digest_fh { CORE::close($fh); $digest = $digest->hexdigest; $self->write("$uri $alg=$digest\r\n"); + $queue->task_done if $queue; $self->after_long_request; } else { $cb = undef; CORE::close($fh); $self->write("ERR read $uri at $offset failed\r\n"); + $queue->task_done if $queue; $self->after_long_request; # should we try to continue? } }; diff --git a/lib/Mogstored/TaskQueue.pm b/lib/Mogstored/TaskQueue.pm new file mode 100644 index 00000000..f25a2284 --- /dev/null +++ b/lib/Mogstored/TaskQueue.pm @@ -0,0 +1,42 @@ +# low priority task queue which limits jobs (currently MD5 digest requests) +package Mogstored::TaskQueue; +use fields ( + 'active', # number of active tasks + 'max', # maximum active tasks before deferring to pending + 'pending', # pending code refs for execution + ); + +sub new { + my Mogstored::TaskQueue $self = shift; + $self = fields::new($self) unless ref $self; + $self->{active} = 0; + $self->{max} = 1; + $self->{pending} = []; + $self; +} + +sub run { + my ($self, $task) = @_; + + if ($self->{active} < $self->{max}) { + $self->{active}++; + $task->(); + } else { + push @{$self->{pending}}, $task; + } +} + +sub task_done { + my $self = shift; + + $self->{active}--; + if ($self->{active} < $self->{max}) { + my $task = shift @{$self->{pending}}; + if ($task) { + $self->{active}++; + $task->(); + } + } +} + +1; From e596d524aeda5d4beb3510fba1498dd75231a2bb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 13 Mar 2012 15:42:35 -0700 Subject: [PATCH 199/405] DevFID size caching for fsck with checksumming The digest path relies on having a known file size to calculate the MD5 timeout, so save an HTTP HEAD request since we always check file sizes in fsck before we checksum the file. --- lib/MogileFS/DevFID.pm | 6 ++++-- lib/MogileFS/HTTPFile.pm | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index f62c0579..e3468cd2 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -63,9 +63,10 @@ sub vivify_directories { sub size_on_disk { my $self = shift; my $url = $self->get_url; + my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); # check that it has size (>0) and is reachable (not undef) - return MogileFS::HTTPFile->at($url)->size; + return $httpfile->size; } # returns -1 on missing, @@ -74,9 +75,10 @@ sub size_on_disk { sub checksum_on_disk { my ($self, $alg, $ping_cb, $reason) = @_; my $url = $self->get_url; + my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); # check that it has size (>0) and is reachable (not undef) - return MogileFS::HTTPFile->at($url)->digest($alg, $ping_cb, $reason); + return $httpfile->digest($alg, $ping_cb, $reason); } # returns true if size seen matches fid's length diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index fa3d7a54..ce26b36c 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -98,6 +98,9 @@ sub delete { use constant FILE_MISSING => -1; sub size { my $self = shift; + + return $self->{_size} if defined $self->{_size}; + my ($host, $port, $uri, $path) = map { $self->{$_} } qw(host port uri url); return undef if (exists $size_check_retry_after{$host} @@ -118,8 +121,10 @@ sub size { $res->header('server') =~ m/^lighttpd/) { # lighttpd 1.4.x (main release) does not return content-length for # 0 byte files. + $self->{_size} = 0; return 0; } + $self->{_size} = $size; return $size; } else { if ($res->code == 404) { From 1fd5ef4cb501485ee5705da77bae2893088ca93f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 13 Mar 2012 13:24:31 -0700 Subject: [PATCH 200/405] re-enable SHA-1 for checksums Optimized SHA-1 implementations aren't significantly slower than MD5 and some folks (e.g. Tomas Doran) may already have SHA-1 in place for their data. A liberally licensed, GPL-compatible collection of SHA-1 primitives is available from one of the OpenSSL developers: http://www.openssl.org/~appro/cryptogams/ It would be nice to allow the Perl Digest module to transparently take advantage of architecture-specific optimizations. Note there is no standardized equivalent to the HTTP Content-MD5 header/trailer for any of the SHA variants, so verification for replication/uploads may take significantly longer. Requested-by: Tomas Doran --- lib/MogileFS/Checksum.pm | 2 +- lib/Mogstored/SideChannelClient.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 29a614c7..86b5ae31 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -5,9 +5,9 @@ use overload '""' => \&as_string; my %TYPE = ( "MD5" => { type => 1, bytelen => 128 / 8 }, + "SHA-1" => { type => 2, bytelen => 160 / 8 }, # see POD for rationale below -# "SHA-1" => { type => 2, bytelen => 160 / 8 }, # "SHA-224" => { type => 3, bytelen => 224 / 8 }, # "SHA-256" => { type => 4, bytelen => 256 / 8 }, # "SHA-384" => { type => 5, bytelen => 384 / 8 }, diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index fe900e8f..f11f0e83 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -72,7 +72,7 @@ sub read_buf_consume { } $self->watch_read(0); Mogstored->iostat_subscribe($self); - } elsif ($cmd =~ /^(MD5) (\S+)(?: (\w+))?$/) { + } elsif ($cmd =~ /^(MD5|SHA-1) (\S+)(?: (\w+))?$/) { # we can easily enable other hash algorithms with the above # regexp, but we won't for now (see MogileFS::Checksum) my $alg = $1; From 4937ade9aeb2c80b5d3c3548bfb0a5bf5b3999f9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 22 Mar 2012 05:55:17 +0000 Subject: [PATCH 201/405] doc/checksums: use $HASHTYPE for referring to hash names $NAME is potentially ambiguous and $HASHTYPE matches the database column name. --- doc/checksums.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index c3a969eb..47be6ed7 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -11,7 +11,7 @@ tracker protocol Hex is used instead of binary over the wire, hex is already immune to URL encoding in the tracker protocol. -Hashes are represented with in the $NAME:$HEXDIGEST format: +Hashes are represented with in the $HASHTYPE:$HEXDIGEST format: MD5:68b329da9893e34099c7d8ad5cb9c940 @@ -19,7 +19,7 @@ verifying checksums (on disk) ----------------------------- Ideally, mogstored checksum calculation is done by mogstored and only -the checksum (in $NAME=$HEXDIGEST format) is sent over the wire. +the checksum (in $HASHTYPE=$HEXDIGEST format) is sent over the wire. If mogstored is not available, the checksum is calculated on the tracker by streaming the file with HTTP GET. @@ -30,7 +30,7 @@ create_close (query worker) New optional parameters: - checksumverify=(0|1) default: 0 (false) -- checksum=$NAME:$HEXDIGEST +- checksum=$HASHTYPE:$HEXDIGEST If "checksumverify" is "1" and "checksum" is present, "create_close" will not return until it has verified the checksum. From b39694931d6b85301d5ce8dd0ba16b5100d4bcc1 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 29 Mar 2012 17:47:21 -0700 Subject: [PATCH 202/405] Add checksum to file_debug output --- lib/MogileFS/Worker/Query.pm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 44463825..0eeb6ca1 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -584,6 +584,15 @@ sub cmd_file_debug { } $ret->{devids} = join(',', @devids) if @devids; + # Always look for a checksum + my $checksum = Mgd::get_store()->get_checksum($fidid); + if ($checksum) { + $checksum = MogileFS::Checksum->new($checksum); + $ret->{checksum} = $checksum->info; + } else { + $ret->{checksum} = 'NONE'; + } + # Return file row (if found) and all other data. my %toret = (fid => $fid, tempfile => $tfile, replqueue => $repl, delqueue => $del, rebqueue => $reb, fsckqueue => $fsck); From b4cca7408cc7b23a69857b169e48a3fbffe0e67a Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 30 Mar 2012 14:48:09 -0700 Subject: [PATCH 203/405] Fix fsck status when running for the first time Fsck would print "Status: N / 0 " if it's never been started before. Now internally finds the max(fid) on its own. --- lib/MogileFS/Worker/Query.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 0eeb6ca1..9ff03b4f 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1479,7 +1479,8 @@ sub cmd_fsck_start { my $intss = sub { MogileFS::Config->server_setting($_[0]) || 0 }; my $checked_fid = $intss->("fsck_highest_fid_checked"); my $final_fid = $intss->("fsck_fid_at_end"); - if ($checked_fid && $final_fid && $checked_fid >= $final_fid) { + if (($checked_fid && $final_fid && $checked_fid >= $final_fid) || + (!$final_fid && !$checked_fid)) { $self->_do_fsck_reset or return $self->err_line; } From f19e09588ea8155f6c1575863605e1d429a46c36 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 30 Mar 2012 15:03:19 -0700 Subject: [PATCH 204/405] make fsck_checksum == off honored in more places if fsck_checksum was set to off, it would ignore the checksums deep in the code, but would still attempt to "fix" the fids each time, which runs far more code and UPDATE's each fid's devcount even if you tell it not to. now it does what it should. however FSCK with checksums enabled will still UPDATE devcount on each check. --- lib/MogileFS/Worker/Fsck.pm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index b6f5cade..0e829772 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -156,7 +156,7 @@ sub check_fid { # check the replication policy, which is already done, so finish. return HANDLED if $self->{opt_nostat}; - if ($self->{opt_checksum}) { + if ($self->{opt_checksum} && $self->{opt_checksum} ne "off") { return $fix->(); } @@ -184,7 +184,8 @@ sub check_fid { }); if ($rv) { - return $fid->class->hashtype ? $fix->() : HANDLED; + return ($fid->class->hashtype && !($self->{opt_checksum} && $self->{opt_checksum} eq "off")) + ? $fix->() : HANDLED; } elsif ($err eq "stalled") { return STALLED; } elsif ($err eq "needfix") { From d9bb8e86a4373b539a5fde12e4a5714e62f0bda2 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 30 Mar 2012 15:26:25 -0700 Subject: [PATCH 205/405] Checking in changes prior to tagging of version 2.60. Changelog diff is: diff --git a/CHANGES b/CHANGES index 770e518..5b59d7f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +2012-03-30: Release version 2.60 + + * Fix fsck status when running for the first time (dormando ) + + * Checksum support (Major update!) (Eric Wong ) + See doc/checksums.txt for an overview of how the new checksum system + works. Also keep an eye on the wiki (http://www.mogilefs.org) for more + complete documentation in the coming weeks. + 2012-02-29: Release version 2.59 * don't make SQLite error out on lock calls (dormando ) --- CHANGES | 9 +++++++++ MANIFEST | 6 ++++++ lib/MogileFS/Server.pm | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 770e5188..5b59d7f6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +2012-03-30: Release version 2.60 + + * Fix fsck status when running for the first time (dormando ) + + * Checksum support (Major update!) (Eric Wong ) + See doc/checksums.txt for an overview of how the new checksum system + works. Also keep an eye on the wiki (http://www.mogilefs.org) for more + complete documentation in the coming weeks. + 2012-02-29: Release version 2.59 * don't make SQLite error out on lock calls (dormando ) diff --git a/MANIFEST b/MANIFEST index d51b2b60..45309c62 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,6 +6,8 @@ doc/fsck-notes.txt doc/memcache-support.txt doc/pluggable-replication-policies.txt doc/testing.txt +doc/checksums.txt +lib/MogileFS/Checksum.pm lib/MogileFS/Class.pm lib/MogileFS/Config.pm lib/MogileFS/Connection/Client.pm @@ -59,6 +61,7 @@ lib/Mogstored/HTTPServer/Perlbal.pm lib/Mogstored/HTTPServer/None.pm lib/Mogstored/SideChannelClient.pm lib/Mogstored/SideChannelListener.pm +lib/Mogstored/TaskQueue.pm Makefile.PL MANIFEST mogautomount @@ -71,6 +74,9 @@ t/02-host-device.t t/10-weighting.t t/20-filepaths.t t/30-rebalance.t +t/40-httpfile.t +t/50-checksum.t +t/checksum.t t/fid-stat.t t/mogstored-shutdown.t t/multiple-hosts-replpol.t diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 3650b28c..2bc9806b 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.59"; +$VERSION = "2.60"; =head1 NAME From c0dcf7f48b6044e693394d55b86c4c96f4b6c317 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Mar 2012 19:26:40 -0800 Subject: [PATCH 206/405] log: enable autoflush for stdout logging Buffering log output in memory makes it difficult to view debug and error output during development. Since MogileFS does not write to stdout frequently, there should be no noticeable performance loss from this change. This also prevents mangling of TAP output which caused test failures if DEBUG=1 is set. --- lib/MogileFS/Server.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 2bc9806b..b6ce2888 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -267,6 +267,7 @@ sub device_factory { sub log { # simple logging functionality if (! $MogileFS::Config::daemonize) { + $| = 1; # syslog acts like printf so we have to use printf and append a \n shift; # ignore the first parameter (info, warn, critical, etc) my $mask = shift; # format string From ea5d78d583723acb4daeeae4803fd77b1167667a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 14 Apr 2012 00:55:04 +0000 Subject: [PATCH 207/405] worker: delete_domain returns has_classes error I noticed that attempting to delete a domain with classes returns an unhelpful "Operation failed" error message. --- lib/MogileFS/Worker/Query.pm | 1 + t/00-startup.t | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 9ff03b4f..af6625f2 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -857,6 +857,7 @@ sub cmd_delete_domain { my $err = error_code($@); return $self->err_line('domain_has_files') if $err eq "has_files"; + return $self->err_line('domain_has_classes') if $err eq "has_classes"; return $self->err_line("failure"); } diff --git a/t/00-startup.t b/t/00-startup.t index 906817c6..d202ca88 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -24,7 +24,7 @@ find_mogclient_or_skip(); my $sto = eval { temp_store(); }; if ($sto) { - plan tests => 73; + plan tests => 78; } else { plan skip_all => "Can't create temporary test database: $@"; exit 0; @@ -89,6 +89,12 @@ ok($tmptrack->mogadm("domain", "add", "todie"), "created todie domain"); ok($tmptrack->mogadm("domain", "delete", "todie"), "delete todie domain"); ok(!$tmptrack->mogadm("domain", "delete", "todie"), "didn't delete todie domain again"); +ok($tmptrack->mogadm("domain", "add", "hasclass"), "created hasclass domain"); +ok($tmptrack->mogadm("class", "add", "hasclass", "nodel"), "created nodel class"); +ok(!$tmptrack->mogadm("domain", "delete", "hasclass"), "didn't delete hasclass domain"); +ok($tmptrack->mogadm("class", "delete", "hasclass", "nodel"), "created nodel class"); +ok($tmptrack->mogadm("domain", "delete", "hasclass"), "didn't delete hasclass domain"); + ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); From 508a958e7f7c5ede121e6d18476db79b67cdae54 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 14 Apr 2012 00:32:45 +0000 Subject: [PATCH 208/405] monitor: only broadcast reject_bad_md5 on change There's no need to broadcast changes to other workers if there are no changes. Since HTTP servers rarely (if ever) change their ability to toggle Content-MD5 rejection, this was causing needless wakeups in every monitor round. Tested by running mogilefsd with DEBUG=1 and using toggling Content-MD5 rejection in mogstored + perlbal 1.80 via: SET mogstored.enable_md5 = (0|1) to the mgmt port while watching syslog output. Noticed-by: dormando --- lib/MogileFS/Device.pm | 4 ++++ lib/MogileFS/Worker/Monitor.pm | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 1d96beef..a87a24f9 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -301,4 +301,8 @@ sub devices { return Mgd::device_factory()->get_all; } +sub reject_bad_md5 { + return $_[0]->{reject_bad_md5}; +} + 1; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index e36e7f96..61e32838 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -457,8 +457,12 @@ sub check_bogus_md5 { # most servers /will/ succeed here :< my $resp = $self->ua->request($req); my $rej = $resp->is_success ? 0 : 1; - debug("dev$devid: reject_bad_md5 = $rej"); - $self->state_event('device', $devid, { reject_bad_md5 => $rej }); + my $prev = $dev->reject_bad_md5; + + if (!defined($prev) || $prev != $rej) { + debug("dev$devid: reject_bad_md5 = $rej"); + $self->state_event('device', $devid, { reject_bad_md5 => $rej }); + } } 1; From 0bdec9b9b49a0f3936b53dc4d4800cc490a5b496 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Apr 2012 13:54:00 -0700 Subject: [PATCH 209/405] tests: add test for fsck functionality Before we make changes to the fsck code, we should ensure we don't break existing use cases. Behavior I'm uncertain about is documented with "XXX". --- t/60-fsck.t | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 t/60-fsck.t diff --git a/t/60-fsck.t b/t/60-fsck.t new file mode 100644 index 00000000..b03e0854 --- /dev/null +++ b/t/60-fsck.t @@ -0,0 +1,292 @@ +# -*-perl-*- +# some of the comments match the comments in MogileFS/Worker/Fsck.pm +# _exactly_ for reference purposes +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); +use MogileFS::Admin; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms1, "got mogstored2"); + +while (! -e "$mogroot{1}/dev1/usage" && + ! -e "$mogroot{2}/dev2/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +sub wait_for_empty_queue { + my ($table, $dbh) = @_; + my $limit = 600; + my $delay = 0.1; + my $i = $limit; + my $count; + while ($i > 0) { + $count = $dbh->selectrow_array("SELECT COUNT(*) from $table"); + return if ($count == 0); + sleep $delay; + } + my $time = $delay * $limit; + die "$table is not empty after ${time}s!"; +} + +sub full_fsck { + my ($tmptrack, $dbh) = @_; + + # this should help prevent race conditions: + wait_for_empty_queue("file_to_queue", $dbh); + + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); +} + +wait_for_monitor($be); + +my ($req, $rv, %opts, @paths, @fsck_log, $info); +my $ua = LWP::UserAgent->new; +my $key = "testkey"; +my $dbh = $sto->dbh; + +use Data::Dumper; + +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "1copy"); + print $fh "hello\n"; + ok(close($fh), "closed file"); +} + +# first obvious fucked-up case: no devids even presumed to exist. +{ + $info = $mogc->file_info($key); + is($info->{devcount}, 1, "ensure devcount is correct at start"); + + # ensure repl queue is empty before destroying file_on + wait_for_empty_queue("file_to_replicate", $dbh); + + is($dbh->do("DELETE FROM file_on"), 1, "delete $key from file_on table"); + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 3 && sleep(0.1)); + + wait_for_empty_queue("file_to_queue", $dbh); + @fsck_log = $sto->fsck_log_rows; + + my $nopa = $fsck_log[0]; + is($nopa->{evcode}, "NOPA", "evcode for no paths logged"); + + # entering "desperate" mode + my $srch = $fsck_log[1]; + is($srch->{evcode}, "SRCH", "evcode for start search logged"); + + # wow, we actually found it! + my $fond = $fsck_log[2]; + is($fond->{evcode}, "FOND", "evcode for start search logged"); + + $info = $mogc->file_info($key); + is($info->{devcount}, 1, "ensure devcount is correct at fsck end"); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 1, "get_paths returns correctly at fsck end"); +} + +# update class to require 2copies and have fsck fix it +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 1, "only one path exists before fsck"); + + # _NOT_ using "updateclass" command since that enqueues for replication + my $fid = MogileFS::FID->new($info->{fid}); + my $classid_2copies = $dbh->selectrow_array("SELECT classid FROM class WHERE dmid = ? AND classname = ?", undef, $fid->dmid, "2copies"); + is($fid->update_class(classid => $classid_2copies), 1, "classid updated"); + + full_fsck($tmptrack, $dbh); + + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) == 1 and sleep(0.1)); + is(scalar(@paths), 2, "replicated from fsck"); + + $info = $mogc->file_info($key); + is($info->{devcount}, 2, "ensure devcount is updated by replicate"); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 and sleep(10)); + + my $povi = $fsck_log[0]; + is($povi->{evcode}, "POVI", "policy violation logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); +} + +# wrong devcount in file column, but otherwise everything is OK +{ + foreach my $wrong_devcount (13, 0, 1) { + is($dbh->do("UPDATE file SET devcount = ? WHERE fid = ?", undef, $wrong_devcount, $info->{fid}), 1, "set improper devcount"); + + $info = $mogc->file_info($key); + is($info->{devcount}, $wrong_devcount, "devcount is set to $wrong_devcount"); + + full_fsck($tmptrack, $dbh); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} == $wrong_devcount && sleep(0.1)); + is($info->{devcount}, 2, "devcount is corrected by fsck"); + + # XXX POVI gets logged here, but BCNT might be more correct... + wait_for_empty_queue("file_to_queue", $dbh); + @fsck_log = $sto->fsck_log_rows; + is($fsck_log[0]->{evcode}, "POVI", "policy violation logged"); + } +} + +# nuke a file from disk but keep the file_on row +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + $rv = $ua->delete($paths[0]); + ok($rv->is_success, "DELETE successful"); + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 2 && sleep(0.1)); + + my $miss = $fsck_log[0]; + is($miss->{evcode}, "MISS", "missing file logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); + + wait_for_empty_queue("file_to_replicate", $dbh); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $rv = $ua->get($path); + is($rv->content, "hello\n", "GET successful on restored path"); + } +} + +# change the length of a file from disk and have fsck correct it +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("hello\r\n"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 2 && sleep(0.1)); + + my $blen = $fsck_log[0]; + is($blen->{evcode}, "BLEN", "missing file logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); + + wait_for_empty_queue("file_to_replicate", $dbh); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $rv = $ua->get($path); + is($rv->content, "hello\n", "GET successful on restored path"); + } +} + +# nuke a file completely and irreparably +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $rv = $ua->delete($path); + ok($rv->is_success, "DELETE successful"); + } + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 4 && sleep(0.1)); + + is($fsck_log[0]->{evcode}, "MISS", "missing file logged for first path"); + is($fsck_log[1]->{evcode}, "MISS", "missing file logged for second path"); + is($fsck_log[2]->{evcode}, "SRCH", "desperate search attempt logged"); + is($fsck_log[3]->{evcode}, "GONE", "inability to fix FID logged"); + + wait_for_empty_queue("file_to_queue", $dbh); + $info = $mogc->file_info($key); + + # XXX devcount probably needs to be updated on GONE + # is($info->{devcount}, 2, "devcount updated to zero"); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 0, "get_paths returns nothing"); +} + +done_testing(); From 5aa8dc4c97fcd68115ec9dc393daed7097589c5a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Apr 2012 21:11:43 +0000 Subject: [PATCH 210/405] tests: fix fsck test to work on older LWP::UserAgent The LWP::UserAgent module found in my Debian 6.0 installation does not have a "delete" convenience wrapper. --- t/60-fsck.t | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index b03e0854..8b5c5f42 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -207,7 +207,8 @@ use Data::Dumper; { @paths = $mogc->get_paths($key); is(scalar(@paths), 2, "two paths returned from get_paths"); - $rv = $ua->delete($paths[0]); + $req = HTTP::Request->new(DELETE => $paths[0]); + $rv = $ua->request($req); ok($rv->is_success, "DELETE successful"); full_fsck($tmptrack, $dbh); @@ -266,7 +267,8 @@ use Data::Dumper; @paths = $mogc->get_paths($key); is(scalar(@paths), 2, "two paths returned from get_paths"); foreach my $path (@paths) { - $rv = $ua->delete($path); + $req = HTTP::Request->new(DELETE => $path); + $rv = $ua->request($req); ok($rv->is_success, "DELETE successful"); } From 644be5043b3143a9f46506b48f3a7ec47c2f55d9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 22 Apr 2012 02:50:05 +0000 Subject: [PATCH 211/405] add fsck test to MANIFEST --- MANIFEST | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST b/MANIFEST index 45309c62..d2fb4efb 100644 --- a/MANIFEST +++ b/MANIFEST @@ -76,6 +76,7 @@ t/20-filepaths.t t/30-rebalance.t t/40-httpfile.t t/50-checksum.t +t/60-fsck.t t/checksum.t t/fid-stat.t t/mogstored-shutdown.t From 8789f2f89a7192355a33684d425a2af11e16f376 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 22 Apr 2012 03:08:51 +0000 Subject: [PATCH 212/405] tests: fsck test use MogileFS::Store API when possible No point in using DBI directly if a task can be done directly via the MogileFS::Store API. Noticed-by: dormando --- t/60-fsck.t | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 8b5c5f42..0d9bc2b5 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -124,7 +124,11 @@ use Data::Dumper; # ensure repl queue is empty before destroying file_on wait_for_empty_queue("file_to_replicate", $dbh); - is($dbh->do("DELETE FROM file_on"), 1, "delete $key from file_on table"); + my @devids = $sto->fid_devids($info->{fid}); + is(scalar(@devids), 1, "devids retrieved"); + is($sto->remove_fidid_from_devid($info->{fid}, $devids[0]), 1, + "delete $key from file_on table"); + full_fsck($tmptrack, $dbh); do { @fsck_log = $sto->fsck_log_rows; @@ -157,7 +161,7 @@ use Data::Dumper; # _NOT_ using "updateclass" command since that enqueues for replication my $fid = MogileFS::FID->new($info->{fid}); - my $classid_2copies = $dbh->selectrow_array("SELECT classid FROM class WHERE dmid = ? AND classname = ?", undef, $fid->dmid, "2copies"); + my $classid_2copies = $sto->get_classid_by_name($fid->dmid, "2copies"); is($fid->update_class(classid => $classid_2copies), 1, "classid updated"); full_fsck($tmptrack, $dbh); From 52550261b32fb74791efba107f3fa2efcc77acf9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 23 Apr 2012 06:42:36 +0000 Subject: [PATCH 213/405] t/60-fsck.t: fix overly long sleep when waiting for fsck log This was leftover when I was monitoring the test with DEBUG=1 :x --- t/60-fsck.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 0d9bc2b5..31bdb647 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -176,7 +176,7 @@ use Data::Dumper; do { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 and sleep(10)); + } while (scalar(@fsck_log) == 0 and sleep(0.1)); my $povi = $fsck_log[0]; is($povi->{evcode}, "POVI", "policy violation logged by fsck"); From ccab7b41874e85ad5a450517e7ba8b6f095fe9c3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 27 Apr 2012 03:01:11 +0000 Subject: [PATCH 214/405] t/60-fsck: additional test cases * ensure fsck can handle a stall from an unresponsive mogstored * ensure over-replicated files are cleaned up * ensure fsck can work correctly with dead devices if it beats reaper to an FID --- t/60-fsck.t | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 3 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 31bdb647..eef41922 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -22,7 +22,8 @@ use File::Temp; my %mogroot; $mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); $mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); -my $dev2host = { 1 => 1, 2 => 2, }; +$mogroot{3} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, 3 => 2 }; foreach (sort { $a <=> $b } keys %$dev2host) { my $root = $mogroot{$dev2host->{$_}}; mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; @@ -33,8 +34,9 @@ ok($ms1, "got mogstored1"); my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); ok($ms1, "got mogstored2"); -while (! -e "$mogroot{1}/dev1/usage" && - ! -e "$mogroot{2}/dev2/usage") { +while (! -e "$mogroot{1}/dev1/usage" || + ! -e "$mogroot{2}/dev2/usage" || + ! -e "$mogroot{2}/dev3/usage") { print "Waiting on usage...\n"; sleep(.25); } @@ -295,4 +297,160 @@ use Data::Dumper; is(scalar(@paths), 0, "get_paths returns nothing"); } +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "2copies"); + print $fh "hello\n"; + ok(close($fh), "closed file"); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); +} + +# stall fsck with a non-responsive host +{ + is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1"); + wait_for_monitor($be); + + my $delay = 10; + ok($tmptrack->mogadm("fsck", "start"), "started fsck, sleeping ${delay}s"); + sleep $delay; + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); +} + +# resume fsck when host is responsive again +{ + is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1"); + wait_for_monitor($be); + + # force fsck to wakeup and do work again + my $now = $sto->unix_timestamp; + is($sto->dbh->do("UPDATE file_to_queue SET nexttry = $now"), 1, "unblocked fsck queue"); + ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); + ok($admin->getline, "got wakeup response 1"); + ok($admin->getline, "got wakeup response 2"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + + sleep 1; + } + + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue emptied"); +} + +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "2copies"); + print $fh "hello\n"; + ok(close($fh), "closed file"); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); +} + +# stall fsck with a non-responsive host +# resume fsck when host is responsive again +{ + is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1 to stall"); + wait_for_monitor($be); + + my $delay = 10; + ok($tmptrack->mogadm("fsck", "start"), "started fsck, sleeping ${delay}s"); + sleep $delay; + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); + + is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1 to resume"); + wait_for_monitor($be); + + # force fsck to wakeup and do work again + my $now = $sto->unix_timestamp; + is($sto->dbh->do("UPDATE file_to_queue SET nexttry = $now"), 1, "unblocked fsck queue"); + ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); + ok($admin->getline, "got wakeup response 1"); + ok($admin->getline, "got wakeup response 2"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + + sleep 1; + } + + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue emptied"); +} + +# cleanup over-replicated file +{ + $info = $mogc->file_info($key); + is($info->{devcount}, 2, "ensure devcount is correct at start"); + + # _NOT_ using "updateclass" command since that enqueues for replication + my $fid = MogileFS::FID->new($info->{fid}); + my $classid_1copy = $sto->get_classid_by_name($fid->dmid, "1copy"); + is($fid->update_class(classid => $classid_1copy), 1, "classid updated"); + + full_fsck($tmptrack, $dbh); + + sleep(1) while $mogc->file_info($key)->{devcount} == 2; + is($mogc->file_info($key)->{devcount}, 1, "too-happy file cleaned up"); + + @fsck_log = $sto->fsck_log_rows; + is($fsck_log[0]->{evcode}, "POVI", "policy violation logged"); + + # replicator is requested to delete too-happy file + is($fsck_log[1]->{evcode}, "REPL", "replication request logged"); +} + +# kill a device and replace it with another, without help from reaper +# this test may become impossible if monitor + reaper are merged... +{ + ok($mogc->update_class($key, "2copies"), "request 2 replicas again"); + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); + wait_for_empty_queue("file_to_replicate", $dbh); + + $admin->syswrite("!jobs\n"); + my $reaper_pid; + + while (my $line = $admin->getline) { + last if $line =~ /^\.\r?\n/; + + if ($line =~ /^reaper pids (\d+)\r?\n/) { + $reaper_pid = $1; + } + } + ok($reaper_pid, "got pid of reaper"); + + # reaper is watchdog is 240s, and it wakes up in 5s intervals right now + # so we should be safe from watchdog for now... + ok(kill("STOP", $reaper_pid), "stopped reaper from running"); + + ok($tmptrack->mogadm("device", "mark", "hostB", 2, "dead"), "mark dev2 as dead"); + ok($tmptrack->mogadm("device", "add", "hostB", 3), "created dev3 on hostB"); + wait_for_monitor($be); + + full_fsck($tmptrack, $dbh); + + sleep 1 while MogileFS::Config->server_setting("fsck_host"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + sleep 1; + } + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue empty"); + + # fsck should've corrected what reaper missed + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 1, "fsck log populated"); + is($fsck_log[0]->{evcode}, "REPL", "replication enqueued"); + + ok(kill("CONT", $reaper_pid), "resumed reaper"); +} + done_testing(); From eba5cadddd8fddcb2b1f21547202cae4501fdbaa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 24 Apr 2012 01:52:25 +0000 Subject: [PATCH 215/405] fsck: log bad count correctly instead of policy violation A BCNT error is more descriptive than a generic POVI entry and more accurately reflects the change made to an FID entry. This also removes a dependency from /using/ the devcount column and simplifies the code. The devcount column remains invaluable and informative to users, but MogileFS should not trust it for making decisions when it has access to the file_on table. --- lib/MogileFS/FID.pm | 5 ----- t/60-fsck.t | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 5e32d32e..0e5337b7 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -228,11 +228,6 @@ sub devids_meet_policy { or die "No global device map"; my @devs = $self->devs; - # This is a little heavy handed just to fix the 'devcount' cache, but - # doing it here ensures we get the error logged. - unless (MogileFS::Config->server_setting_cached('skip_devcount') || @devs == $self->devcount) { - return 0; - } my %rep_args = ( fid => $self->id, diff --git a/t/60-fsck.t b/t/60-fsck.t index eef41922..8b18d5cc 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -202,10 +202,9 @@ use Data::Dumper; } while ($info->{devcount} == $wrong_devcount && sleep(0.1)); is($info->{devcount}, 2, "devcount is corrected by fsck"); - # XXX POVI gets logged here, but BCNT might be more correct... wait_for_empty_queue("file_to_queue", $dbh); @fsck_log = $sto->fsck_log_rows; - is($fsck_log[0]->{evcode}, "POVI", "policy violation logged"); + is($fsck_log[0]->{evcode}, "BCNT", "bad count logged"); } } From f54abb1b3f1bde865a02fdf654e6caaecef971c5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 1 May 2012 23:44:43 +0000 Subject: [PATCH 216/405] additional tests for fsck stop, resume and stats We need to ensure fsck can resume and returns sane stats output. --- t/60-fsck.t | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/t/60-fsck.t b/t/60-fsck.t index 8b18d5cc..3a2bb007 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -452,4 +452,59 @@ use Data::Dumper; ok(kill("CONT", $reaper_pid), "resumed reaper"); } +{ + foreach my $i (1..10) { + my $fh = $mogc->new_file("k$i", "1copy"); + print $fh "$i\n"; + ok(close($fh), "closed file ($i)"); + } + $info = $mogc->file_info("k10"); + + ok($tmptrack->mogadm("settings", "set", "queue_rate_for_fsck", 1), "set queue_rate_for_fsck to 1"); + ok($tmptrack->mogadm("settings", "set", "queue_size_for_fsck", 1), "set queue_size_for_fsck to 1"); + wait_for_monitor($be); + + ok($tmptrack->mogadm("fsck", "start"), "start fsck with slow queue rate"); + + ok(MogileFS::Config->server_setting("fsck_host"), "fsck_host set"); + is(MogileFS::Config->server_setting("fsck_fid_at_end"), $info->{fid}, "fsck_fid_at_end matches"); + + my $highest = undef; + foreach my $i (1..100) { + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + last if defined $highest; + sleep 0.1; + } + ok($highest, "fsck_highest_fid_checked is set"); + like($highest, qr/\A\d+\z/, "fsck_highest_fid_checked is a digit"); + + # wait for something to get fscked + foreach my $i (1..100) { + last if MogileFS::Config->server_setting("fsck_highest_fid_checked") != $highest; + sleep 0.1; + } + + my $old_highest = $highest; + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + isnt($highest, $old_highest, "moved to next FID"); + + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok(! MogileFS::Config->server_setting("fsck_host"), "fsck_host unset"); + is(MogileFS::Config->server_setting("fsck_fid_at_end"), $info->{fid}, "fsck_fid_at_end matches"); + + # resume paused fsck + ok($tmptrack->mogadm("fsck", "start"), "restart fsck"); + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + cmp_ok($highest, '>=', $old_highest, "fsck resumed without resetting fsck_highest_fid_checked"); + + # wait for something to get fscked + foreach my $i (1..200) { + last if MogileFS::Config->server_setting("fsck_highest_fid_checked") != $highest; + sleep 0.1; + } + + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + cmp_ok($highest, '>', $old_highest, "fsck continued to higher FID"); +} + done_testing(); From e7b9f1facf1f459726cc8db5df728acf4c7a8b5e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 3 May 2012 22:39:23 +0000 Subject: [PATCH 217/405] t/60-fsck: retry SQL statements on deadlock Hopefully this can eliminate some random test failures. --- t/60-fsck.t | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 3a2bb007..6c460bdb 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -102,6 +102,13 @@ sub full_fsck { ok($tmptrack->mogadm("fsck", "start"), "started fsck"); } +sub unblock_fsck_queue { + my ($sto, $expect) = @_; + my $now = $sto->unix_timestamp; + my $upd = sub { $sto->dbh->do("UPDATE file_to_queue SET nexttry = $now") }; + is($sto->retry_on_deadlock($upd), $expect, "unblocked fsck queue"); +} + wait_for_monitor($be); my ($req, $rv, %opts, @paths, @fsck_log, $info); @@ -190,7 +197,11 @@ use Data::Dumper; # wrong devcount in file column, but otherwise everything is OK { foreach my $wrong_devcount (13, 0, 1) { - is($dbh->do("UPDATE file SET devcount = ? WHERE fid = ?", undef, $wrong_devcount, $info->{fid}), 1, "set improper devcount"); + my $upd = sub { + $dbh->do("UPDATE file SET devcount = ? WHERE fid = ?", + undef, $wrong_devcount, $info->{fid}); + }; + is($sto->retry_on_deadlock($upd), 1, "set improper devcount"); $info = $mogc->file_info($key); is($info->{devcount}, $wrong_devcount, "devcount is set to $wrong_devcount"); @@ -325,8 +336,8 @@ use Data::Dumper; wait_for_monitor($be); # force fsck to wakeup and do work again - my $now = $sto->unix_timestamp; - is($sto->dbh->do("UPDATE file_to_queue SET nexttry = $now"), 1, "unblocked fsck queue"); + unblock_fsck_queue($sto, 1); + ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); ok($admin->getline, "got wakeup response 1"); ok($admin->getline, "got wakeup response 2"); @@ -367,8 +378,7 @@ use Data::Dumper; wait_for_monitor($be); # force fsck to wakeup and do work again - my $now = $sto->unix_timestamp; - is($sto->dbh->do("UPDATE file_to_queue SET nexttry = $now"), 1, "unblocked fsck queue"); + unblock_fsck_queue($sto, 1); ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); ok($admin->getline, "got wakeup response 1"); ok($admin->getline, "got wakeup response 2"); From 2da72edaa583a1dc3547a9f031ce770f0dbd3255 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 4 May 2012 02:51:14 +0000 Subject: [PATCH 218/405] t/60-fsck: allow fsck_highest_fid_checked to be zero After resetting, fsck_highest_fid_checked ends up at zero. --- t/60-fsck.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 6c460bdb..e33b8da5 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -485,7 +485,7 @@ use Data::Dumper; last if defined $highest; sleep 0.1; } - ok($highest, "fsck_highest_fid_checked is set"); + ok(defined($highest), "fsck_highest_fid_checked is set"); like($highest, qr/\A\d+\z/, "fsck_highest_fid_checked is a digit"); # wait for something to get fscked From 29bc491831ee71d17a829c1fae63066b8bb34cb8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 9 May 2012 22:49:38 +0000 Subject: [PATCH 219/405] t/60-fsck: fix typo resulting in useless check We checked the incorrect return value, so the second mogstored failing would've gone unnoticed. --- t/60-fsck.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index e33b8da5..98a7bad7 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -32,7 +32,7 @@ foreach (sort { $a <=> $b } keys %$dev2host) { my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); ok($ms1, "got mogstored1"); my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); -ok($ms1, "got mogstored2"); +ok($ms2, "got mogstored2"); while (! -e "$mogroot{1}/dev1/usage" || ! -e "$mogroot{2}/dev2/usage" || From db028489e98d87ba88387b8adab10dce97350fd8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 9 May 2012 22:49:39 +0000 Subject: [PATCH 220/405] t/60-fsck: fix potential race conditions These race conditions were causing this test fail occasionally. These test failures were more common on SQLite and Postgres, but not unheard of when using MySQL. Some of these race conditions were due to fsck/job_master not receiving settings changes in time, so we now resort to killing existing processes and forcing them to reload. --- t/60-fsck.t | 66 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 98a7bad7..d79f44e4 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -109,6 +109,24 @@ sub unblock_fsck_queue { is($sto->retry_on_deadlock($upd), $expect, "unblocked fsck queue"); } +sub get_worker_pids { + my ($admin, $worker) = @_; + + ok($admin->syswrite("!jobs\n"), "requested jobs"); + my @pids; + + while (my $line = $admin->getline) { + last if $line =~ /^\.\r?\n/; + + if ($line =~ /^$worker pids (\d[\d+\s]*)\r?\n/) { + @pids = split(/\s+/, $1); + } + } + ok(scalar(@pids), "got pid(s) of $worker"); + + return @pids; +} + wait_for_monitor($be); my ($req, $rv, %opts, @paths, @fsck_log, $info); @@ -322,10 +340,18 @@ use Data::Dumper; # stall fsck with a non-responsive host { is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1"); - wait_for_monitor($be); + wait_for_monitor($be) foreach (1..3); + + foreach my $worker (qw/job_master fsck/) { + my (@pids) = get_worker_pids($admin, $worker); + is(scalar(@pids), 1, "got one $worker pid"); + ok(kill("KILL", $pids[0]), + "killed $worker to reload observed down state of mogstored1"); + } my $delay = 10; - ok($tmptrack->mogadm("fsck", "start"), "started fsck, sleeping ${delay}s"); + $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); + full_fsck($tmptrack, $dbh); sleep $delay; is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); } @@ -335,6 +361,11 @@ use Data::Dumper; is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1"); wait_for_monitor($be); + my (@fsck_pids) = get_worker_pids($admin, "fsck"); + is(scalar(@fsck_pids), 1, "got one fsck pid"); + ok(kill("TERM", $fsck_pids[0]), + "kill fsck worker to reload observed alive state of mogstored1"); + # force fsck to wakeup and do work again unblock_fsck_queue($sto, 1); @@ -370,7 +401,7 @@ use Data::Dumper; wait_for_monitor($be); my $delay = 10; - ok($tmptrack->mogadm("fsck", "start"), "started fsck, sleeping ${delay}s"); + full_fsck($tmptrack, $dbh); sleep $delay; is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); @@ -424,16 +455,9 @@ use Data::Dumper; is($info->{devcount}, 2, "ensure devcount is correct at start"); wait_for_empty_queue("file_to_replicate", $dbh); - $admin->syswrite("!jobs\n"); - my $reaper_pid; - - while (my $line = $admin->getline) { - last if $line =~ /^\.\r?\n/; - - if ($line =~ /^reaper pids (\d+)\r?\n/) { - $reaper_pid = $1; - } - } + my (@reaper_pids) = get_worker_pids($admin, "reaper"); + is(scalar(@reaper_pids), 1, "only one reaper pid"); + my $reaper_pid = $reaper_pids[0]; ok($reaper_pid, "got pid of reaper"); # reaper is watchdog is 240s, and it wakes up in 5s intervals right now @@ -463,6 +487,8 @@ use Data::Dumper; } { + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + foreach my $i (1..10) { my $fh = $mogc->new_file("k$i", "1copy"); print $fh "$i\n"; @@ -472,8 +498,19 @@ use Data::Dumper; ok($tmptrack->mogadm("settings", "set", "queue_rate_for_fsck", 1), "set queue_rate_for_fsck to 1"); ok($tmptrack->mogadm("settings", "set", "queue_size_for_fsck", 1), "set queue_size_for_fsck to 1"); - wait_for_monitor($be); + wait_for_monitor($be) foreach (1..3); + + foreach my $worker (qw/job_master fsck/) { + my (@pids) = get_worker_pids($admin, $worker); + is(scalar(@pids), 1, "got one $worker pid"); + ok(kill("KILL", $pids[0]), + "killed $worker to reload server settings cache"); + } + + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); ok($tmptrack->mogadm("fsck", "start"), "start fsck with slow queue rate"); ok(MogileFS::Config->server_setting("fsck_host"), "fsck_host set"); @@ -487,6 +524,7 @@ use Data::Dumper; } ok(defined($highest), "fsck_highest_fid_checked is set"); like($highest, qr/\A\d+\z/, "fsck_highest_fid_checked is a digit"); + isnt($highest, $info->{fid}, "fsck is not running too fast"); # wait for something to get fscked foreach my $i (1..100) { From 3c1c6809d43f10eb9817efd734289e159947fb6d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 May 2012 23:50:00 +0000 Subject: [PATCH 221/405] fsck: update devcount, forget devs on unfixable FIDs Whenever an FID is unfixable, be sure to update devcount (to zero) to easily inform the user via mogstats. If the FID magically reappears in the future, the desperate fallback mode will still find the file. --- lib/MogileFS/Worker/Fsck.pm | 22 +++++++++++++++------- t/60-fsck.t | 3 +-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 0e829772..36ca475f 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -303,7 +303,11 @@ sub fix_fid { $check_dfids->("desperate"); # still can't fix it? - return CANT_FIX unless @good_devs; + unless (@good_devs) { + $self->forget_bad_devs($fid, @bad_devs); + $fid->update_devcount; + return CANT_FIX; + } # wow, we actually found it! $fid->fsck_log(EV_FOUND_FID); @@ -313,12 +317,7 @@ sub fix_fid { # wrong, with only one file_on record...) and re-replicate } - # remove the file_on mappings for devices that were bogus/missing. - foreach my $bdev (@bad_devs) { - error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); - $fid->forget_about_device($bdev); - } - + $self->forget_bad_devs($fid, @bad_devs); # in case the devcount or similar was fixed. $fid->want_reload; @@ -450,6 +449,15 @@ sub fix_checksums { } } +# remove the file_on mappings for devices that were bogus/missing. +sub forget_bad_devs { + my ($self, $fid, @bad_devs) = @_; + foreach my $bdev (@bad_devs) { + error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); + $fid->forget_about_device($bdev); + } +} + 1; # Local Variables: diff --git a/t/60-fsck.t b/t/60-fsck.t index d79f44e4..95dfefc5 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -319,8 +319,7 @@ use Data::Dumper; wait_for_empty_queue("file_to_queue", $dbh); $info = $mogc->file_info($key); - # XXX devcount probably needs to be updated on GONE - # is($info->{devcount}, 2, "devcount updated to zero"); + is($info->{devcount}, 0, "devcount updated to zero"); @paths = $mogc->get_paths($key); is(scalar(@paths), 0, "get_paths returns nothing"); } From 56ff97b6cedb532aa5ffec1c798ce1401d050677 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 12 May 2012 02:54:39 +0000 Subject: [PATCH 222/405] fsck: cleanup and reduce unnecessary devcount updates fix_fid(): we no longer rely blindly update devcount on every call. This is important because we call fix_fid() on checksum checks regardless, and devcount updates entail unnecessary updates to the `file' table. While we're at it, consolidate the places where we check the skip_devcount flag and log bad devcounts. --- lib/MogileFS/Worker/Fsck.pm | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 36ca475f..82afbc61 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -137,14 +137,9 @@ sub check_fid { } # This is a simple fixup case - unless (MogileFS::Config->server_setting_cached('skip_devcount') || scalar($fid->devids) == $fid->devcount) { - # log a bad count - $fid->fsck_log(EV_BAD_COUNT); - - # TODO: We could fix this without a complete fix pass - # $fid->update_devcount(); - return $fix->(); - } + # If we got here, we already know we have no policy violation and + # don't need to call $fix->() to just fix a devcount + $self->maybe_fix_devcount($fid); # missing checksum row if ($fid->class->hashtype && ! $fid->checksum) { @@ -217,9 +212,6 @@ sub fix_fid { my ($self, $fid) = @_; debug(sprintf("Fixing FID %d", $fid->id)); - # This should happen first, since the fid gets awkwardly reloaded... - $fid->update_devcount; - # make devfid objects from the devids that this fid is on, my @dfids = map { MogileFS::DevFID->new($_, $fid) } $fid->devids; @@ -332,10 +324,7 @@ sub fix_fid { } # Clean up the device count if it's wrong - unless(MogileFS::Config->server_setting_cached('skip_devcount') || scalar($fid->devids) == $fid->devcount) { - $fid->update_devcount(); - $fid->fsck_log(EV_BAD_COUNT); - } + $self->maybe_fix_devcount($fid); return HANDLED; } @@ -458,6 +447,17 @@ sub forget_bad_devs { } } +sub maybe_fix_devcount { + # don't even log BCNT errors if skip_devcount is enabled + return if MogileFS::Config->server_setting_cached('skip_devcount'); + + my ($self, $fid) = @_; + return if scalar($fid->devids) == $fid->devcount; + # log a bad count + $fid->fsck_log(EV_BAD_COUNT); + $fid->update_devcount(); +} + 1; # Local Variables: From d51d0bd49b5e3915327e7a6dc8ca830104825690 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 May 2012 01:46:24 +0000 Subject: [PATCH 223/405] t/60-fsck: detect fsck stalls via '!watch' Instead of blindly sleeping, we can '!watch' through the tracker and detect the the error message fsck sends. --- t/60-fsck.t | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 95dfefc5..813ef3d5 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -75,6 +75,22 @@ sub wait_for_monitor { $be->{timeout} = $was; } +sub watcher_wait_for { + my ($re, $cb) = @_; + my $line; + my $watcher = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); + $watcher or die "failed to create watcher socket: $!"; + $watcher->syswrite("!watch\r\n"); + + $cb->(); # usually this is to start fsck + + do { + $line = $watcher->getline; + } until ($line =~ /$re/); + + $watcher->close; +} + sub wait_for_empty_queue { my ($table, $dbh) = @_; my $limit = 600; @@ -348,10 +364,10 @@ use Data::Dumper; "killed $worker to reload observed down state of mogstored1"); } - my $delay = 10; $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); - full_fsck($tmptrack, $dbh); - sleep $delay; + watcher_wait_for(qr/\[fsck\(\d+\)] Connectivity problem reaching device/, sub { + full_fsck($tmptrack, $dbh); + }); is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); } @@ -399,9 +415,9 @@ use Data::Dumper; is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1 to stall"); wait_for_monitor($be); - my $delay = 10; - full_fsck($tmptrack, $dbh); - sleep $delay; + watcher_wait_for(qr/\[fsck\(\d+\)] Connectivity problem reaching device/, sub { + full_fsck($tmptrack, $dbh); + }); is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1 to resume"); From fa1a6af3d35da8e1d0c271c3dd0c861cc71c5566 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 May 2012 07:05:15 +0000 Subject: [PATCH 224/405] t/60-fsck: use "!to :shutdown" to kill workers This is a simpler implementation and lets us be notified of worker death (and pending replacement) as soon the tracker notices it. --- t/60-fsck.t | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/t/60-fsck.t b/t/60-fsck.t index 813ef3d5..58eba2f2 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -143,6 +143,16 @@ sub get_worker_pids { return @pids; } +sub shutdown_worker { + my ($admin, $worker) = @_; + + watcher_wait_for(qr/Job $worker has only 0/, sub { + $admin->syswrite("!to $worker :shutdown\r\n"); + like($admin->getline, qr/^Message sent to 1 children/, "tracker sent message to child"); + like($admin->getline, qr/^\./, "tracker ended transmission"); + }); +} + wait_for_monitor($be); my ($req, $rv, %opts, @paths, @fsck_log, $info); @@ -357,12 +367,8 @@ use Data::Dumper; is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1"); wait_for_monitor($be) foreach (1..3); - foreach my $worker (qw/job_master fsck/) { - my (@pids) = get_worker_pids($admin, $worker); - is(scalar(@pids), 1, "got one $worker pid"); - ok(kill("KILL", $pids[0]), - "killed $worker to reload observed down state of mogstored1"); - } + shutdown_worker($admin, "job_master"); + shutdown_worker($admin, "fsck"); $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); watcher_wait_for(qr/\[fsck\(\d+\)] Connectivity problem reaching device/, sub { @@ -376,10 +382,7 @@ use Data::Dumper; is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1"); wait_for_monitor($be); - my (@fsck_pids) = get_worker_pids($admin, "fsck"); - is(scalar(@fsck_pids), 1, "got one fsck pid"); - ok(kill("TERM", $fsck_pids[0]), - "kill fsck worker to reload observed alive state of mogstored1"); + shutdown_worker($admin, "fsck"); # force fsck to wakeup and do work again unblock_fsck_queue($sto, 1); @@ -516,12 +519,8 @@ use Data::Dumper; wait_for_monitor($be) foreach (1..3); - foreach my $worker (qw/job_master fsck/) { - my (@pids) = get_worker_pids($admin, $worker); - is(scalar(@pids), 1, "got one $worker pid"); - ok(kill("KILL", $pids[0]), - "killed $worker to reload server settings cache"); - } + shutdown_worker($admin, "job_master"); + shutdown_worker($admin, "fsck"); ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); From 5151d6de495d0ce1cbf46ff38d6fe5a070f42997 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 May 2012 01:13:39 +0000 Subject: [PATCH 225/405] fsck: prevent running over 100% completion FIDs may be created while fsck is running, causing "mogadm fsck status" to report completion above 100% (and thus confusing users). Stopping fsck when it reaches fsck_fid_at_end (set to MAX(fid) at fsck startup) prevents this. This change should also have a pleasant side effect of reducing contention with replicate workers on newly-uploaded FIDs. ref: http://code.google.com/p/mogilefs/issues/detail?id=50 --- lib/MogileFS/Store.pm | 11 ++++++----- lib/MogileFS/Worker/JobMaster.pm | 10 +++++++--- t/60-fsck.t | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 17618203..76932104 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1601,15 +1601,16 @@ sub get_fids_above_id { return @ret; } -# Same as above, but returns unblessed hashref. -sub get_fidids_above_id { - my ($self, $fidid, $limit) = @_; +# gets fidids above fidid_low up to (and including) fidid_high +sub get_fidids_between { + my ($self, $fidid_low, $fidid_high, $limit) = @_; $limit ||= 1000; $limit = int($limit); my $dbh = $self->dbh; - my $fidids = $dbh->selectcol_arrayref(qq{SELECT fid FROM file WHERE fid > ? - ORDER BY fid LIMIT $limit}, undef, $fidid); + my $fidids = $dbh->selectcol_arrayref(qq{SELECT fid FROM file + WHERE fid > ? and fid <= ? + ORDER BY fid LIMIT $limit}, undef, $fidid_low, $fidid_high); return $fidids; } diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index feaa9f85..26bc347c 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -148,15 +148,19 @@ sub _inject_fsck_queues { return if ($queue_size >= $max_queue); my $max_checked = MogileFS::Config->server_setting('fsck_highest_fid_checked') || 0; + my $fid_at_end = MogileFS::Config->server_setting('fsck_fid_at_end'); my $to_inject = MogileFS::Config->server_setting_cached('queue_rate_for_fsck') || DEF_FSCK_QUEUE_INJECT; - my $fids = $sto->get_fidids_above_id($max_checked, $to_inject); + my $fids = $sto->get_fidids_between($max_checked, $fid_at_end, $to_inject); unless (@$fids) { - $sto->set_server_setting("fsck_host", undef); - $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime); MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $max_checked); + + # set these last since tests/scripts may rely on these to + # determine when fsck (injection) is complete + $sto->set_server_setting("fsck_host", undef); + $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime); return; } diff --git a/t/60-fsck.t b/t/60-fsck.t index 58eba2f2..66130ac3 100644 --- a/t/60-fsck.t +++ b/t/60-fsck.t @@ -569,4 +569,24 @@ use Data::Dumper; cmp_ok($highest, '>', $old_highest, "fsck continued to higher FID"); } +# upload new files, but ensure fsck does NOT reach them +{ + my $last_fid = $sto->max_fidid; + + foreach my $i (1..10) { + my $fh = $mogc->new_file("z$i", "1copy"); + print $fh "$i\n"; + ok(close($fh), "closed file (z$i)"); + } + + # crank up fsck speed again + ok($tmptrack->mogadm("settings", "set", "queue_rate_for_fsck", 100), "set queue_rate_for_fsck to 100"); + ok($tmptrack->mogadm("settings", "set", "queue_size_for_fsck", 100), "set queue_size_for_fsck to 100"); + + sleep 0.1 while MogileFS::Config->server_setting("fsck_host"); + + my $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + is($highest, $last_fid, "fsck didn't advance beyond what we started with"); +} + done_testing(); From 39dd79696330fe58e7ee32f2bef67f17ea3c66a0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 12 May 2012 02:59:53 +0000 Subject: [PATCH 226/405] remove redundant code to ignore SIGPIPE MogileFS::Server and Perlbal both ignore SIGPIPE for us. So there's no need to ever ignore it for socket writes in HTTPFile, either. --- MANIFEST | 1 - lib/MogileFS/HTTPFile.pm | 13 +------------ lib/MogileFS/Sys.pm | 13 ------------- 3 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 lib/MogileFS/Sys.pm diff --git a/MANIFEST b/MANIFEST index d2fb4efb..ca27fdd2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -34,7 +34,6 @@ lib/MogileFS/Store.pm lib/MogileFS/Store/MySQL.pm lib/MogileFS/Store/Postgres.pm lib/MogileFS/Store/SQLite.pm -lib/MogileFS/Sys.pm lib/MogileFS/Test.pm lib/MogileFS/Util.pm lib/MogileFS/Factory.pm diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index ce26b36c..c7b0442a 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -106,10 +106,6 @@ sub size { return undef if (exists $size_check_retry_after{$host} && $size_check_retry_after{$host} > Time::HiRes::time()); - # don't SIGPIPE us - my $flag_nosignal = MogileFS::Sys->flag_nosignal; - local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; - my $node_timeout = MogileFS->config("node_timeout"); # Hardcoded connection cache size of 20 :( $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); @@ -163,12 +159,9 @@ sub digest_mgmt { # assuming the storage node can checksum at >=2MB/s, low expectations here my $response_timeout = $self->size / (2 * 1024 * 1024); - my $flag_nosignal = MogileFS::Sys->flag_nosignal; - local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; - retry: $sock = $mogconn->sock($node_timeout) or return; - $rv = send($sock, $req, $flag_nosignal); + $rv = send($sock, $req, 0); if ($! || $rv != $reqlen) { my $err = $!; $mogconn->mark_dead; @@ -214,10 +207,6 @@ retry: sub digest_http { my ($self, $alg, $ping_cb) = @_; - # don't SIGPIPE us (why don't we just globally ignore SIGPIPE?) - my $flag_nosignal = MogileFS::Sys->flag_nosignal; - local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; - # TODO: refactor my $node_timeout = MogileFS->config("node_timeout"); # Hardcoded connection cache size of 20 :( diff --git a/lib/MogileFS/Sys.pm b/lib/MogileFS/Sys.pm deleted file mode 100644 index cf47e336..00000000 --- a/lib/MogileFS/Sys.pm +++ /dev/null @@ -1,13 +0,0 @@ -package MogileFS::Sys; -use strict; -use Socket qw(MSG_NOSIGNAL); -use vars qw($FLAG_NOSIGNAL); - -# used in send() calls to request not to get SIGPIPEd -eval { $FLAG_NOSIGNAL = MSG_NOSIGNAL }; - -sub flag_nosignal { - return $FLAG_NOSIGNAL; -} - -1; From d8b580596c7ac616a821cadfbf4a77a5ad4ca27b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 5 Mar 2012 10:36:28 +0000 Subject: [PATCH 227/405] delete: prevent orphan files from replicator race Use the replicate lock here to prevent an DevFID from being orphaned by a replicate process. This prevents orphaned requests if a user issues a delete request on a file while replication worker is copying it. --- lib/MogileFS/Worker/Delete.pm | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index 77a00e9d..3d65b192 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -155,6 +155,14 @@ sub process_deletes2 { # load all the devids related to this fid, and delete. my $fid = MogileFS::FID->new($todo->{fid}); my $fidid = $fid->id; + + # if it's currently being replicated, wait for replication to finish + # before deleting to avoid stale files + if (! $sto->should_begin_replicating_fidid($fidid)) { + $sto->reschedule_file_to_delete2_relative($fidid, 1); + next; + } + my @devids = $fid->devids; my %devids = map { $_ => 1 } @devids; @@ -241,8 +249,8 @@ sub process_deletes2 { # fid has no pants. unless (keys %devids) { $sto->delete_fid_from_file_to_delete2($fidid); - next; } + $sto->note_done_replicating($fidid); } # did work. From e8d276ab15e6d8a4af0e3d871a05ef3951730699 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 3 May 2012 23:50:15 +0000 Subject: [PATCH 228/405] don't attempt to broadcast undef config values to children This avoids an uninitialized value warning from Perl when choosing a value for the deprecated listen_jobs value. Neither the child nor the parent processes are capable of handling undef values from :set_config_from_*. --- lib/MogileFS/Config.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 578f7716..122516c0 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -29,7 +29,7 @@ sub set_config { # if a child, propagate to parent if (my $worker = MogileFS::ProcManager->is_child) { $worker->send_to_parent(":set_config_from_child $k $v"); - } else { + } elsif (defined $v) { MogileFS::ProcManager->send_to_all_children(":set_config_from_parent $k $v"); } From 67f95e3515f3ca7304612ecd86197a80362092bb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 4 May 2012 03:02:31 +0000 Subject: [PATCH 229/405] eliminate dead code for invalid_meta* Most of this was already nuked in the following commits: ebf8a5a8dc9b4452671f7816b99583bf7934e9b1 3db8a84930417ded2d3ed73a2c6bf8aeb8da4edf --- lib/MogileFS/ProcManager.pm | 6 ------ lib/MogileFS/Worker.pm | 6 ------ 2 files changed, 12 deletions(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 706b7162..34228fb4 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -696,12 +696,6 @@ sub HandleChildRequest { } elsif ($cmd =~ /^:wake_a (\w+)$/) { MogileFS::ProcManager->wake_a($1, $child); - - } elsif ($cmd =~ /^:invalidate_meta (\w+)/) { - - my $what = $1; - MogileFS::ProcManager->send_to_all_children(":invalidate_meta_once $what", $child); - } elsif ($cmd =~ /^:set_config_from_child (\S+) (.+)/) { # and this will rebroadcast it to all other children # (including the one that just set it to us, but eh) diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 1f00c170..a4e77bdc 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -187,12 +187,6 @@ sub parent_ping { } } -sub invalidate_meta { - my ($self, $what) = @_; - return if $Mgd::INVALIDATE_NO_PROPOGATE; # anti recursion - $self->send_to_parent(":invalidate_meta $what"); -} - # tries to parse generic (not job-specific) commands sent from parent # to child. returns 1 on success, or 0 if command given isn't generic, # and child should parse. From 7a8ebda8f144475b45ae52151c6f8c9d030c45a2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 4 May 2012 03:15:12 +0000 Subject: [PATCH 230/405] store: remove unused random_fids_on_device() sub Unused since commit 0be2f9771a213e92d807f29bead13e6e4203ad54 when the old drain/rebalance code was dropped. --- lib/MogileFS/Store.pm | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 76932104..8b4b81c8 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -2141,25 +2141,6 @@ sub release_lock { sub lock_queue { 1 } sub unlock_queue { 1 } -# returns up to $limit @fidids which are on provided $devid -sub random_fids_on_device { - my ($self, $devid, $limit) = @_; - $limit = int($limit) || 100; - - my $dbh = $self->dbh; - - # FIXME: this blows. not random. and good chances these will - # eventually get to point where they're un-rebalance-able, and we - # never move on past the first 5000 - my @some_fids = List::Util::shuffle(@{ - $dbh->selectcol_arrayref("SELECT fid FROM file_on WHERE devid=? LIMIT 5000", - undef, $devid) || [] - }); - - @some_fids = @some_fids[0..$limit-1] if $limit < @some_fids; - return @some_fids; -} - sub BLOB_BIND_TYPE { undef; } sub set_checksum { From b1c80a2aa5649f5983478c52714c640aa2b173f6 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 May 2012 23:45:38 +0000 Subject: [PATCH 231/405] monitor: remove unnecessary conditional assignments Based on my reading of the code, the conditional assignments of $hostip, $get_port, $devid, $timeout, and $url are needless. --- lib/MogileFS/Worker/Monitor.pm | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 61e32838..6b03d8c0 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -336,11 +336,6 @@ sub check_device { my $response = $ua->get($url); my $res_time = Time::HiRes::time(); - $hostip ||= 'unknown'; - $get_port ||= 'unknown'; - $devid ||= 'unknown'; - $timeout ||= 'unknown'; - $url ||= 'unknown'; unless ($response->is_success) { my $failed_after = $res_time - $start_time; if ($failed_after < 0.5) { From 19ab0fa95d0aed5a74b9ffd8bfd628635c9e7f8c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 3 May 2012 01:46:44 +0000 Subject: [PATCH 232/405] tests: use done_testing() instead of test counts Keeping track of explicit test counts causes needless merge conflicts. done_testing() is sufficient to note test completeness and detect failures. --- t/00-startup.t | 6 +++--- t/01-domain-class.t | 6 +++--- t/02-host-device.t | 5 ++--- t/10-weighting.t | 5 ++--- t/20-filepaths.t | 6 +++--- t/30-rebalance.t | 6 +++--- t/40-httpfile.t | 6 +++--- t/50-checksum.t | 6 +++--- t/checksum.t | 6 +++--- t/fid-stat.t | 5 +---- t/mogstored-shutdown.t | 4 ++-- t/multiple-hosts-replpol.t | 3 +-- t/replpolicy-parsing.t | 4 +--- t/store.t | 6 +++--- t/util.t | 4 +--- 15 files changed, 34 insertions(+), 44 deletions(-) diff --git a/t/00-startup.t b/t/00-startup.t index d202ca88..12ac0290 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -23,9 +23,7 @@ find_mogclient_or_skip(); # etc my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 78; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -361,3 +359,5 @@ sub try_for { } return 0; } + +done_testing(); diff --git a/t/01-domain-class.t b/t/01-domain-class.t index 8ea0ccd1..3f7f8cd4 100644 --- a/t/01-domain-class.t +++ b/t/01-domain-class.t @@ -17,9 +17,7 @@ use MogileFS::Class; use Data::Dumper qw/Dumper/; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 35; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -158,3 +156,5 @@ ok($domfac != $classfac, "factories are not the same singleton"); 'hashtype' => undef, }, 'class baz came back as boo'); } + +done_testing(); diff --git a/t/02-host-device.t b/t/02-host-device.t index 7a7ab9f2..718c2533 100644 --- a/t/02-host-device.t +++ b/t/02-host-device.t @@ -17,9 +17,7 @@ use MogileFS::Device; use Data::Dumper qw/Dumper/; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 21; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -119,3 +117,4 @@ observed_state => 'writeable'}); }, 'dev2 is as expected'); } +done_testing(); diff --git a/t/10-weighting.t b/t/10-weighting.t index b0057548..01734af2 100644 --- a/t/10-weighting.t +++ b/t/10-weighting.t @@ -26,9 +26,7 @@ find_mogclient_or_skip(); # etc my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 19; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -122,3 +120,4 @@ for (1..100) { ok($stats{1} < 15, "Device 1 should get roughly 5% of traffic, got: $stats{1}"); ok($stats{2} > 80, "Device 2 should get roughly 95% of traffic, got: $stats{2}"); +done_testing(); diff --git a/t/20-filepaths.t b/t/20-filepaths.t index 96cff2d9..e76e1976 100644 --- a/t/20-filepaths.t +++ b/t/20-filepaths.t @@ -27,9 +27,7 @@ plan skip_all => "Filepaths plugin has been separated from the server, a bit of exit 0; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 19; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -138,4 +136,6 @@ my $data = "My test file.\n" x 1024; ok($mogc->filepaths_disable, "Filepaths disabled successfully"); +done_testing(); + # vim: filetype=perl diff --git a/t/30-rebalance.t b/t/30-rebalance.t index 4f19a164..8219269a 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -12,9 +12,7 @@ use MogileFS::Test; find_mogclient_or_skip(); my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 48; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -277,3 +275,5 @@ sub try_for { } return 0; } + +done_testing(); diff --git a/t/40-httpfile.t b/t/40-httpfile.t index 1fe68f25..7fed6d73 100644 --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -12,9 +12,7 @@ use MogileFS::Test; find_mogclient_or_skip(); my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 16; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -127,3 +125,5 @@ $file = MogileFS::HTTPFile->at($paths[0]); ok($size == $file->size, "big file size match $size"); ok($file->digest_mgmt('MD5', sub {}) eq $expect, "digest_mgmt('MD5') on big file"); ok($file->digest_http('MD5', sub {}) eq $expect, "digest_http('MD5') on big file"); + +done_testing(); diff --git a/t/50-checksum.t b/t/50-checksum.t index a1113f0c..4d9c9c8c 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -10,9 +10,7 @@ use HTTP::Request; find_mogclient_or_skip(); my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 141; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -444,3 +442,5 @@ use MogileFS::Config; is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); } + +done_testing(); diff --git a/t/checksum.t b/t/checksum.t index 3c1c5a4c..9703c62a 100644 --- a/t/checksum.t +++ b/t/checksum.t @@ -11,9 +11,7 @@ use MogileFS::Checksum; use Digest::MD5 qw(md5 md5_hex); my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 7; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -34,3 +32,5 @@ my $reloaded = MogileFS::Checksum->new($hash); is("d41d8cd98f00b204e9800998ecf8427e", $reloaded->hexdigest, "hex matches"); my $fid_checksum = MogileFS::FID->new(6)->checksum; is_deeply($fid_checksum, $csum, "MogileFS::FID->checksum works"); + +done_testing(); diff --git a/t/fid-stat.t b/t/fid-stat.t index da603151..89ee7617 100644 --- a/t/fid-stat.t +++ b/t/fid-stat.t @@ -8,8 +8,6 @@ use FindBin qw($Bin); use Mogstored::FIDStatter; use File::Temp qw(tempdir); -plan tests => 11; - my $td = tempdir(CLEANUP => 1); ok($td, "got tempdir"); ok(-d $td, "tempdir is writable"); @@ -119,5 +117,4 @@ sub make_file { close($fh) or die; } - - +done_testing(); diff --git a/t/mogstored-shutdown.t b/t/mogstored-shutdown.t index 6556ba5f..60892ea6 100644 --- a/t/mogstored-shutdown.t +++ b/t/mogstored-shutdown.t @@ -13,8 +13,6 @@ unless ((`netstat -nap --inet` || "") =~ m!PID/Program!) { exit 0; } -plan tests => 4; - my $TEST_IP = '127.0.1.1'; my $rv; @@ -88,3 +86,5 @@ sub try { } return undef; } + +done_testing(); diff --git a/t/multiple-hosts-replpol.t b/t/multiple-hosts-replpol.t index 404fbd2a..dcb6c6f2 100644 --- a/t/multiple-hosts-replpol.t +++ b/t/multiple-hosts-replpol.t @@ -10,8 +10,6 @@ use MogileFS::Util qw(error_code); use MogileFS::ReplicationPolicy::MultipleHosts; use MogileFS::Test; -plan tests => 13; - # already good. is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), "all_good", "all good"); @@ -124,3 +122,4 @@ sub rr { return $rr->t_as_string; } +done_testing(); diff --git a/t/replpolicy-parsing.t b/t/replpolicy-parsing.t index 782d918a..6298fdb4 100644 --- a/t/replpolicy-parsing.t +++ b/t/replpolicy-parsing.t @@ -10,8 +10,6 @@ use MogileFS::Server; use MogileFS::Util qw(error_code); use MogileFS::Test; -plan tests => 31; - my $obj; $obj = MogileFS::ReplicationPolicy->new_from_policy_string("MultipleHosts(5)"); @@ -39,4 +37,4 @@ foreach my $str ("Union(MultipleHosts(5), MultipleHosts(2))", isa_ok($obj->{policies}[1], "MogileFS::ReplicationPolicy::MultipleHosts"); } - +done_testing(); diff --git a/t/store.t b/t/store.t index 1a39ffd3..79576133 100644 --- a/t/store.t +++ b/t/store.t @@ -10,9 +10,7 @@ use MogileFS::Util qw(error_code); use MogileFS::Test; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 30; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -193,3 +191,5 @@ ok($hash->{hashtype} == 2, "hashtype set correctly"); ok(1 == $sto->delete_checksum(6), "checksum deleted OK"); ok(0 == $sto->delete_checksum(6), "checksum delete MISS"); ok(!defined $sto->get_checksum(6), "undef on missing checksum"); + +done_testing(); diff --git a/t/util.t b/t/util.t index 76cf0df5..2d80a045 100644 --- a/t/util.t +++ b/t/util.t @@ -5,8 +5,6 @@ use warnings; use Test::More; use MogileFS::Util qw(weighted_list); -plan tests => 1; - my %first; for (1..100) { my @l = weighted_list(["A", 0.1], ["B", 0.3]); @@ -16,4 +14,4 @@ for (1..100) { # conservative when playing with randomness ok($first{"B"} >= ($first{"A"} * 1.8), "weightest list"); - +done_testing(); From b44b12a0b73775dab17a556b4375e7baf72e434b Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 18 May 2012 12:56:01 -0700 Subject: [PATCH 233/405] Bump Test::More req to get done_testing() may annoy centos5 users. --- Makefile.PL | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.PL b/Makefile.PL index 8a19f011..4480eb62 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -33,6 +33,7 @@ WriteMakefile( 'IO::AIO' => 0, 'MogileFS::Client' => 0, DBI => 0, + 'Test::More' => 0.94, # 0.94 for done_testing() support }, META_MERGE => { no_index => { From 56206f2fbfb0bd04d7c40541be27af286b77b0aa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 May 2012 01:43:10 +0000 Subject: [PATCH 234/405] store: remove get_fids_above_id() subroutine Unused since commit 6c23c9d3395958d5b6b7f39433012fdf0b53a8b8 ("make fsck worker distributed"). Since this always seemed fsck-specific, it's also unlikely plugins are using this. --- lib/MogileFS/Store.pm | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 8b4b81c8..fede71cb 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1580,27 +1580,6 @@ sub get_fidid_chunks_by_device { return $fidids; } -# takes two arguments, fidid to be above, and optional limit (default -# 1,000). returns up to that that many fidids above the provided -# fidid. returns array of MogileFS::FID objects, sorted by fid ids. -sub get_fids_above_id { - my ($self, $fidid, $limit) = @_; - $limit ||= 1000; - $limit = int($limit); - - my @ret; - my $dbh = $self->dbh; - my $sth = $dbh->prepare("SELECT fid, dmid, dkey, length, classid, devcount ". - "FROM file ". - "WHERE fid > ? ". - "ORDER BY fid LIMIT $limit"); - $sth->execute($fidid); - while (my $row = $sth->fetchrow_hashref) { - push @ret, MogileFS::FID->new_from_db_row($row); - } - return @ret; -} - # gets fidids above fidid_low up to (and including) fidid_high sub get_fidids_between { my ($self, $fidid_low, $fidid_high, $limit) = @_; From c744b23db84faad14385b5fd4c7f74b4f31d8b88 Mon Sep 17 00:00:00 2001 From: Gernot Vormayr Date: Tue, 1 May 2012 04:29:47 +0200 Subject: [PATCH 235/405] worker/query: Add optional callid parameter Queries are computed in parallel and therefore replies are not in the right order. This patch adds an optional callid parameter so a caller can match the replies back to the queries. ERR lines return the callid as 3rd parameter. If the callid parameter is missing then the Protocol is the same as before. Example: GET_PATHS callid=1 ERR no_domain No+domain+provided 1 GET_DOMAINS callid=1 OK domains=0&callid=1 --- lib/MogileFS/Worker/Query.pm | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index af6625f2..2db8ced0 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -5,7 +5,7 @@ use strict; use warnings; use base 'MogileFS::Worker'; -use fields qw(querystarttime reqid); +use fields qw(querystarttime reqid callid); use MogileFS::Util qw(error error_code first weighted_list device_state eurl decode_url_args); use MogileFS::HTTPFile; @@ -20,6 +20,7 @@ sub new { $self->{querystarttime} = undef; $self->{reqid} = undef; + $self->{callid} = undef; return $self; } @@ -105,8 +106,9 @@ sub process_line { no strict 'refs'; my $cmd_handler = *{"cmd_$cmd"}{CODE}; + my $args = decode_url_args(\$args); + $self->{callid} = $args->{callid}; if ($cmd_handler) { - my $args = decode_url_args(\$args); local $MogileFS::REQ_altzone = ($args->{zone} && $args->{zone} eq 'alt'); eval { $cmd_handler->($self, $args); @@ -1710,6 +1712,7 @@ sub ok_line { my $id = defined $self->{reqid} ? "$self->{reqid} " : ''; my $args = shift || {}; + $args->{callid} = $self->{callid} if defined $self->{callid}; my $argline = join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); $self->send_to_parent("${id}${delay}OK $argline"); return 1; @@ -1770,8 +1773,9 @@ sub err_line { } my $id = defined $self->{reqid} ? "$self->{reqid} " : ''; + my $callid = defined $self->{callid} ? ' ' . eurl($self->{callid}) : ''; - $self->send_to_parent("${id}${delay}ERR $err_code " . eurl($err_text)); + $self->send_to_parent("${id}${delay}ERR $err_code " . eurl($err_text) . $callid); return 0; } From 3ba3dd588d7012efe61a24394ad0a4e189d848a7 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 18 May 2012 13:39:02 -0700 Subject: [PATCH 236/405] fix redefinition error with previous commit --- lib/MogileFS/Worker/Query.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 2db8ced0..ea384fe7 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -101,12 +101,12 @@ sub process_line { # fallback to normal command handling if ($line =~ /^(\w+)\s*(.*)/) { - my ($cmd, $args) = ($1, $2); + my ($cmd, $orig_args) = ($1, $2); $cmd = lc($cmd); no strict 'refs'; my $cmd_handler = *{"cmd_$cmd"}{CODE}; - my $args = decode_url_args(\$args); + my $args = decode_url_args(\$orig_args); $self->{callid} = $args->{callid}; if ($cmd_handler) { local $MogileFS::REQ_altzone = ($args->{zone} && $args->{zone} eq 'alt'); From fc8ba6b77590f86b40d9eb4fd6e0b45f7cb9a158 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 1 Mar 2012 11:08:59 +0000 Subject: [PATCH 237/405] sqlite: implement locking via tables This is adapted from the Postgres implementation, but since SQLite runs on one machine, we can use kill 0 to detect if a process is got nuked before it could release a lock. --- MANIFEST | 1 + lib/MogileFS/Store/SQLite.pm | 132 +++++++++++++++++++++++++++++++++-- t/store-sqlite.t | 110 +++++++++++++++++++++++++++++ 3 files changed, 237 insertions(+), 6 deletions(-) create mode 100644 t/store-sqlite.t diff --git a/MANIFEST b/MANIFEST index ca27fdd2..5dfff070 100644 --- a/MANIFEST +++ b/MANIFEST @@ -82,6 +82,7 @@ t/mogstored-shutdown.t t/multiple-hosts-replpol.t t/replpolicy-parsing.t t/replpolicy.t +t/store-sqlite.t t/store.t t/util.t TODO diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index d1bd6f89..80a4503f 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -2,6 +2,7 @@ package MogileFS::Store::SQLite; use strict; use warnings; use DBI qw(:sql_types); +use Digest::MD5 qw(md5); # Used for lockid use DBD::SQLite 1.13; use MogileFS::Util qw(throw); use base 'MogileFS::Store'; @@ -14,6 +15,7 @@ use File::Temp (); sub post_dbi_connect { my $self = shift; $self->{dbh}->func(5000, 'busy_timeout'); + $self->{lock_depth} = 0; } sub want_raise_errors { 1 } @@ -33,6 +35,12 @@ sub can_insertignore { 0 } sub can_for_update { 0 } sub unix_timestamp { "strftime('%s','now')" } +sub init { + my $self = shift; + $self->SUPER::init; + $self->{lock_depth} = 0; +} + # DBD::SQLite doesn't really have any table meta info methods # And PRAGMA table_info() does not return "real" rows sub column_type { @@ -48,9 +56,100 @@ sub column_type { return undef; } -# Implement these for native database locking -sub get_lock { 1 } -sub release_lock { 1 } +sub lockid { + my ($lockname) = @_; + croak("Called with empty lockname! $lockname") unless (defined $lockname && length($lockname) > 0); + my $num = unpack 'N',md5($lockname); + return ($num & 0x7fffffff); +} + +# returns 1 if the lock holder is still alive, 0 if lock holder died +sub lock_holder_alive { + my ($self, $lockid, $lockname) = @_; + my $max_age = 3600; + + my $dbh = $self->dbh; + my ($hostname, $pid, $acquiredat) = $dbh->selectrow_array('SELECT hostname,pid,acquiredat FROM lock WHERE lockid = ?', undef, $lockid); + + # maybe the lock was _just_ released + return 0 unless defined $pid; + + # weird setups using NFS? can't ping the pid if it's not us + return 1 if $hostname ne MogileFS::Config->hostname; + + # maybe we were unlucky and the PID got recycled + if ($pid == $$) { + if (($acquiredat + $max_age) >= time) { + die("Possible lock recursion inside DB but not process (grabbing $lockname ($lockid, acquiredat=$acquiredat)"); + } else { + debug("lock for $lockname ($lockid,acquiredat=$acquiredat is more than ${max_age}s old, assuming it is stale"); + } + } else { + # ping the process to see if it's alive + return 1 if kill(0, $pid); + } + + # lock holder died, delete the lock and retry immediately + my $rv = $self->retry_on_deadlock(sub { + $dbh->do('DELETE FROM lock WHERE lockid = ? AND pid = ? AND hostname = ?', undef, $lockid, $pid, MogileFS::Config->hostname); + }); + + # if delete can fail if another process just deleted and regrabbed + # this lock + return $rv ? 0 : 1; +} + +# attempt to grab a lock of lockname, and timeout after timeout seconds. +# the lock should be unique in the space of (lockid). We can also detect +# if pid is dead as SQLite only runs on one host. +# returns 1 on success and 0 on timeout +sub get_lock { + my ($self, $lockname, $timeout) = @_; + my $lockid = lockid($lockname); + die "Lock recursion detected (grabbing $lockname ($lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + + debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; + my $dbh = $self->dbh; + my $lock = undef; + my $try = sub { + $dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, MogileFS::Config->hostname, $$); + }; + + while ($timeout >= 0 and not defined($lock)) { + $lock = eval { $self->retry_on_deadlock($try) }; + if ($self->was_duplicate_error) { + # retry immediately if the lock holder died + if ($self->lock_holder_alive($lockid, $lockname)) { + sleep 1 if $timeout > 0; + $timeout--; + } + next; + } + $self->condthrow; + if (defined $lock and $lock == 1) { + $self->{lock_depth} = 1; + $self->{last_lock} = $lockname; + } else { + die "Something went horribly wrong while getting lock $lockname"; + } + } + return $lock; +} + +# attempt to release a lock of lockname. +# returns 1 on success and 0 if no lock we have has that name. +sub release_lock { + my ($self, $lockname) = @_; + my $lockid = lockid($lockname); + debug("$$ Unlocking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; + my $rv = $self->retry_on_deadlock(sub { + $self->dbh->do('DELETE FROM lock WHERE lockid=? AND pid=? AND hostname=?', undef, $lockid, $$, MogileFS::Config->hostname); + }); + debug("Double-release of lock $lockname!") if $self->{lock_depth} != 0 and $rv == 0 and $Mgd::DEBUG >= 2; + $self->condthrow; + $self->{lock_depth} = 0; + return $rv; +} # -------------------------------------------------------------------------- # Store-related things we override @@ -101,6 +200,15 @@ sub table_exists { }; } +sub setup_database { + my $self = shift; + # old installations may not have this, add this without changing + # schema version globally (unless the table itself changes) + $self->add_extra_tables('lock'); + $self->create_table('lock'); + return $self->SUPER::setup_database; +} + # -------------------------------------------------------------------------- # Schema # -------------------------------------------------------------------------- @@ -213,6 +321,16 @@ sub INDEXES_file_to_delete2 { ("CREATE INDEX file_to_delete2_nexttry ON file_to_delete2 (nexttry)"); } +# Extra table +sub TABLE_lock { + "CREATE TABLE lock ( + lockid INT UNSIGNED NOT NULL PRIMARY KEY, + hostname VARCHAR(255) NOT NULL, + pid INT UNSIGNED NOT NULL, + acquiredat INT UNSIGNED NOT NULL + )" +} + sub filter_create_sql { my ($self, $sql) = @_; $sql =~ s/\bENUM\(.+?\)/TEXT/g; @@ -244,15 +362,17 @@ sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } -# inefficient, but no warning and no locking sub should_begin_replicating_fidid { my ($self, $fidid) = @_; - return 1; + my $lockname = "mgfs:fid:$fidid:replicate"; + return 1 if $self->get_lock($lockname, 1); + return 0; } -# no locking sub note_done_replicating { my ($self, $fidid) = @_; + my $lockname = "mgfs:fid:$fidid:replicate"; + $self->release_lock($lockname); } sub BLOB_BIND_TYPE { SQL_BLOB } diff --git a/t/store-sqlite.t b/t/store-sqlite.t new file mode 100644 index 00000000..8468d6ac --- /dev/null +++ b/t/store-sqlite.t @@ -0,0 +1,110 @@ +# -*-perl-*- +# tests for SQlite-specific features +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use File::Temp (); +use POSIX qw(:sys_wait_h); + +my ($fh, $filename) = File::Temp::tempfile(); +close($fh); +MogileFS::Config->set_config('db_dsn', "DBI:SQLite:$filename"); +MogileFS::Config->set_config('db_user', ''); +MogileFS::Config->set_config('db_pass', ''); +MogileFS::Config->set_config('max_handles', 0xffffffff); + +my ($r, $w, $pid, $buf); +my $sto = eval { MogileFS::Store->new }; +if ($sto) { + plan tests => 28; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +Mgd::set_store($sto); +is(ref($sto), "MogileFS::Store::SQLite", "store is sane"); +is($sto->setup_database, 1, "setup database"); + +is(1, pipe($r, $w), "IPC pipe is ready"); + +# normal lock contention +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $SIG{TERM} = sub { + $sto->release_lock("test-lock") == 1 or die "released bad lock"; + exit 0; + }; + $sto->get_lock("test-lock", 1) == 1 or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 60; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "child wakes us up"); + is($buf, ".", "child wakes parent up properly"); + ok(! $sto->get_lock("test-lock", 1), "fails to lock while child has lock"); + is(kill(TERM => $pid), 1, "kill successful"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + is($?, 0, "child dies correctly"); + is($sto->get_lock("test-lock", 1), 1, "acquire lock when child dies"); +} + +# detects recursive lock +ok(! eval { $sto->get_lock("test-lock", 1); }, "recursion fails"); +like($@, qr/Lock recursion detected/i, "proper error on failed lock"); +is($sto->release_lock("test-lock"), 1, "lock release"); + +is($sto->get_lock("test-lock", 0), 1, "acquire lock with 0 timeout"); +is($sto->release_lock("test-lock"), 1, "lock release"); +is($sto->release_lock("test-lock") + 0, 0, "redundant lock release"); + +# waits for lock +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $sto->get_lock("test-lock", 1) or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 2; + $sto->release_lock("test-lock") == 1 or die "child failed to release"; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "parent woken up"); + is($buf, ".", "child wakes parent up properly"); + ok($sto->get_lock("test-lock", 6), "acquire lock eventually"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + is($?, 0, "child dies correctly"); + is($sto->release_lock("test-lock"), 1, "lock release"); +} + + +# kill -9 a lock holder +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $sto->get_lock("test-lock", 1) or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 60; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "parent woken up"); + is($buf, ".", "child wakes parent up properly"); + is(kill(KILL => $pid), 1, "kill -9 successful"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + ok(WIFSIGNALED($?) && WTERMSIG($?) == 9, "child was SIGKILL-ed"); + ok($sto->get_lock("test-lock", 1), "acquire lock in parent"); +} From 3e7c94c1faee9e3b8b6fc265eb41d39b1a4600da Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 1 Mar 2012 11:13:11 +0000 Subject: [PATCH 238/405] move common lock code into base Store module Since all Store implementations implement get_lock/release_lock, we can safely share the same implementation of both should_begin_replication_fidid() and note_done_replicating(). --- lib/MogileFS/Store.pm | 7 +++++-- lib/MogileFS/Store/MySQL.pm | 13 ------------- lib/MogileFS/Store/Postgres.pm | 13 ------------- lib/MogileFS/Store/SQLite.pm | 13 ------------- 4 files changed, 5 insertions(+), 41 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index fede71cb..89cc109e 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1754,8 +1754,9 @@ sub grab_files_to_queued { # and tell it not to. sub should_begin_replicating_fidid { my ($self, $fidid) = @_; - warn("Inefficient implementation of should_begin_replicating_fidid() in $self!\n"); - 1; + my $lockname = "mgfs:fid:$fidid:replicate"; + return 1 if $self->get_lock($lockname, 1); + return 0; } # called when replicator is done replicating a fid, so you can cleanup @@ -1769,6 +1770,8 @@ sub should_begin_replicating_fidid { # locking in this pair of functions. sub note_done_replicating { my ($self, $fidid) = @_; + my $lockname = "mgfs:fid:$fidid:replicate"; + $self->release_lock($lockname); } sub find_fid_from_file_to_replicate { diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index c0d810d7..5cdcfa6a 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -327,19 +327,6 @@ sub update_devcount_atomic { return 1; } -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - return 1 if $self->get_lock($lockname, 1); - return 0; -} - -sub note_done_replicating { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - $self->release_lock($lockname); -} - sub upgrade_add_host_getport { my $self = shift; # see if they have the get port, else update it diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index f0aad0e8..661a4247 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -609,19 +609,6 @@ sub update_devcount_atomic { return $rv; } -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - return 1 if $self->get_lock($lockname, 1); - return 0; -} - -sub note_done_replicating { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - $self->release_lock($lockname); -} - # enqueue a fidid for replication, from a specific deviceid (can be undef), in a given number of seconds. sub enqueue_for_replication { my ($self, $fidid, $from_devid, $in) = @_; diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 80a4503f..afc776e7 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -362,19 +362,6 @@ sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - return 1 if $self->get_lock($lockname, 1); - return 0; -} - -sub note_done_replicating { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - $self->release_lock($lockname); -} - sub BLOB_BIND_TYPE { SQL_BLOB } 1; From 4ff90dc8380cdb2413109ccd6676e1a282787634 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 May 2012 20:10:29 +0000 Subject: [PATCH 239/405] sqlite: delete expired locks regardless of hostname For rare SQLite setups, drop locks after 3600s regardless of the hostname of the lock holder. This can work around weird setups that change hostnames (frequently) or share SQLite DBs over NFS. --- lib/MogileFS/Store/SQLite.pm | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index afc776e7..1545ff26 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -67,6 +67,7 @@ sub lockid { sub lock_holder_alive { my ($self, $lockid, $lockname) = @_; my $max_age = 3600; + my $force_unlock; my $dbh = $self->dbh; my ($hostname, $pid, $acquiredat) = $dbh->selectrow_array('SELECT hostname,pid,acquiredat FROM lock WHERE lockid = ?', undef, $lockid); @@ -74,28 +75,29 @@ sub lock_holder_alive { # maybe the lock was _just_ released return 0 unless defined $pid; - # weird setups using NFS? can't ping the pid if it's not us - return 1 if $hostname ne MogileFS::Config->hostname; - - # maybe we were unlucky and the PID got recycled - if ($pid == $$) { - if (($acquiredat + $max_age) >= time) { + # if the lock is too old, don't check anything else + if (($acquiredat + $max_age) < time) { + $force_unlock = 1; + } elsif ($hostname eq MogileFS::Config->hostname) { + # maybe we were unlucky and the PID got recycled + if ($pid == $$) { die("Possible lock recursion inside DB but not process (grabbing $lockname ($lockid, acquiredat=$acquiredat)"); - } else { - debug("lock for $lockname ($lockid,acquiredat=$acquiredat is more than ${max_age}s old, assuming it is stale"); } - } else { - # ping the process to see if it's alive + + # don't force the lock if the process is still alive return 1 if kill(0, $pid); + + $force_unlock = 1; } - # lock holder died, delete the lock and retry immediately + return 0 unless $force_unlock; + + # lock holder is dead or the lock is too old: kill the lock my $rv = $self->retry_on_deadlock(sub { - $dbh->do('DELETE FROM lock WHERE lockid = ? AND pid = ? AND hostname = ?', undef, $lockid, $pid, MogileFS::Config->hostname); + $dbh->do('DELETE FROM lock WHERE lockid = ? AND pid = ? AND hostname = ?', undef, $lockid, $pid, $hostname); }); - # if delete can fail if another process just deleted and regrabbed - # this lock + # delete can fail if another process just deleted and regrabbed this lock return $rv ? 0 : 1; } From f54e6fd1076e0c71e5ab9b9dfa1bf01a7960e0e8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 10 May 2012 00:55:59 +0000 Subject: [PATCH 240/405] avoid unnecessary devcount update in create_close The devcount of a newly uploaded file is always 1, so we do not need another set of trips to the DB to set this in the file row. --- lib/MogileFS/Store.pm | 6 +++--- lib/MogileFS/Store/Postgres.pm | 10 +++++----- lib/MogileFS/Worker/Query.pm | 22 +++++++++------------- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 89cc109e..5a228520 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1269,12 +1269,12 @@ sub delete_and_return_tempfile_row { sub replace_into_file { my $self = shift; - my %arg = $self->_valid_params([qw(fidid dmid key length classid)], @_); + my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); die "Your database does not support REPLACE! Reimplement replace_into_file!" unless $self->can_replace; eval { $self->dbh->do("REPLACE INTO file (fid, dmid, dkey, length, classid, devcount) ". - "VALUES (?,?,?,?,?,0) ", undef, - @arg{'fidid', 'dmid', 'key', 'length', 'classid'}); + "VALUES (?,?,?,?,?,?) ", undef, + @arg{'fidid', 'dmid', 'key', 'length', 'classid', 'devcount'}); }; $self->condthrow; } diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 661a4247..2e626c1d 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -741,12 +741,12 @@ sub delete_fidid { sub replace_into_file { my $self = shift; - my %arg = $self->_valid_params([qw(fidid dmid key length classid)], @_); + my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); $self->insert_or_update( - insert => "INSERT INTO file (fid, dmid, dkey, length, classid, devcount) VALUES (?, ?, ?, ?, ?, 0)", - insert_vals => [ @arg{'fidid', 'dmid', 'key', 'length', 'classid'} ], - update => "UPDATE file SET dmid=?, dkey=?, length=?, classid=?, devcount=0 WHERE fid=?", - update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'fidid'} ], + insert => "INSERT INTO file (fid, dmid, dkey, length, classid, devcount) VALUES (?, ?, ?, ?, ?, ?)", + insert_vals => [ @arg{'fidid', 'dmid', 'key', 'length', 'classid', 'devcount'} ], + update => "UPDATE file SET dmid=?, dkey=?, length=?, classid=?, devcount=? WHERE fid=?", + update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'fidid', 'devcount'} ], ); $self->condthrow; } diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index ea384fe7..3242131d 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -455,25 +455,21 @@ sub cmd_create_close { key => $key, length => $size, classid => $trow->{classid}, + devcount => 1, ); # mark it as needing replicating: $fid->enqueue_for_replication(); - if ($fid->update_devcount) { - # call the hook - if this fails, we need to back the file out - my $rv = MogileFS::run_global_hook('file_stored', $args); - if (defined $rv && ! $rv) { # undef = no hooks, 1 = success, 0 = failure - $fid->delete; - return $self->err_line("plugin_aborted"); - } - - # all went well - return $self->ok_line; - } else { - # FIXME: handle this better - return $self->err_line("db_error"); + # call the hook - if this fails, we need to back the file out + my $rv = MogileFS::run_global_hook('file_stored', $args); + if (defined $rv && ! $rv) { # undef = no hooks, 1 = success, 0 = failure + $fid->delete; + return $self->err_line("plugin_aborted"); } + + # all went well, we would've hit condthrow on DB errors + return $self->ok_line; } sub cmd_updateclass { From 560539025a292b28bb4bf1bff324746b34472ee7 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 18 May 2012 19:01:35 -0700 Subject: [PATCH 241/405] fix issue #57 by Pyry and Eric Specifying "alivetypo" as the host status would cause mogilefs to implode. --- lib/MogileFS/Host.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index eb2b55dd..3af2f915 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -33,7 +33,7 @@ sub new_from_args { sub valid_state { my ($class, $state) = @_; - return $state && $state =~ /^alive|dead|down$/; + return $state && $state =~ /\A(?:alive|dead|down)\z/; } # Instance methods: From 5f48f8d7166ae0cc068e2b90a3ea89ac1a43c48e Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 15 May 2012 16:08:32 -0700 Subject: [PATCH 242/405] size arg check from tomas doran --- lib/MogileFS/Worker/Query.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 3242131d..5f8f41fa 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -215,6 +215,8 @@ sub cmd_create_open { my $dmid = $args->{dmid}; my $key = $args->{key} || ""; my $multi = $args->{multi_dest} ? 1 : 0; + my $size = $args->{size} || undef; # Size is optional at create time, + # but used to grep devices if available # optional profiling of stages, if $args->{debug_profile} my @profpoints; # array of [point,hires-starttime] @@ -253,6 +255,10 @@ sub cmd_create_open { @devices = sort_devs_by_freespace(Mgd::device_factory()->get_all); } + if ($size) { + @devices = grep { ($_->mb_free * 1024*1024) > $size } @devices; + } + # find suitable device(s) to put this file on. my @dests; # MogileFS::Device objects which are suitable From 3f8fffd0c1a01e8ac9fd41d51a45e61009e6fdbf Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 18 May 2012 19:25:26 -0700 Subject: [PATCH 243/405] drop Test::More to 0.88 - don't need subtests --- Makefile.PL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.PL b/Makefile.PL index 4480eb62..f136b115 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -33,7 +33,7 @@ WriteMakefile( 'IO::AIO' => 0, 'MogileFS::Client' => 0, DBI => 0, - 'Test::More' => 0.94, # 0.94 for done_testing() support + 'Test::More' => 0.88, # 0.88 for done_testing() support }, META_MERGE => { no_index => { From 27a51c259e86ec60ca46458de2dc192b010ec1bd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 May 2012 05:31:32 +0000 Subject: [PATCH 244/405] t/50-checksum: /possibly/ fix a stuck test This possible fix could also be hiding another bug, but the original test ordering was suspect... --- t/50-checksum.t | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/t/50-checksum.t b/t/50-checksum.t index 4d9c9c8c..e32487b7 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -144,6 +144,10 @@ use Digest::MD5 qw/md5_hex/; # save new row to checksum table { my $key = 'savecksum'; + + syswrite($admin, "!want 0 replicate\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabled replicate"); + %opts = ( domain => "testdom", class => "2copies", key => $key ); $rv = $be->do_request("create_open", \%opts); %opts = %$rv; @@ -163,9 +167,21 @@ use Digest::MD5 qw/md5_hex/; my $info = $mogc->file_info($key); ok($info, "file_info($key) is sane"); is($info->{checksum}, "MD5:".md5_hex('blah'), 'checksum shows up'); - $sto->delete_checksum($info->{fid}); + is($sto->delete_checksum($info->{fid}), 1, "$key checksum row deleted"); $info = $mogc->file_info($key); is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); + + syswrite($admin, "!want 1 replicate\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + + # wait for replicate to recreate checksum + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) == 1 and sleep(0.1)); + is(scalar(@paths), 2, "replicate successfully with good checksum"); + + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex('blah'), 'checksum recreated on repl'); } # flip checksum classes around @@ -189,16 +205,6 @@ use Digest::MD5 qw/md5_hex/; is($classes[0]->{hashtype}, undef, "hashtype unset"); } -# wait for replicate to verify existing (valid) checksum -{ - my $key = 'savecksum'; - - do { - @paths = $mogc->get_paths($key); - } while (scalar(@paths) == 1 and sleep(0.1)); - is(scalar(@paths), 2, "replicate successfully with good checksum"); -} - # save checksum on replicate, client didn't care to provide one { my $key = 'lazycksum'; From 7b058a978ed9e8ade8a756b13d9cf83f2623d134 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 18 May 2012 22:59:13 -0700 Subject: [PATCH 245/405] Checking in changes prior to tagging of version 2.61. Changelog diff is: diff --git a/CHANGES b/CHANGES index 5b59d7f..de3ba9b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,33 @@ +2012-05-18: Release version 2.61 + + * fix issue #57 by Pyry and Eric (dormando ) + (mogadm host status sometimes allowed typos) + + * avoid unnecessary devcount update in create_close (Eric Wong ) + + * sqlite: implement locking via tables (Eric Wong ) + + * worker/query: Add optional callid parameter (Gernot Vormayr ) + (allows command pipelining) + + * delete: prevent orphan files from replicator race (Eric Wong ) + + * fsck: prevent running over 100% completion (Eric Wong ) + + * fsck: cleanup and reduce unnecessary devcount updates (Eric Wong ) + + * fsck: update devcount, forget devs on unfixable FIDs (Eric Wong ) + + * fsck: log bad count correctly instead of policy violation (Eric Wong ) + + * tests: add test for fsck functionality (Eric Wong ) + + * monitor: only broadcast reject_bad_md5 on change (Eric Wong ) + + * worker: delete_domain returns has_classes error (Eric Wong ) + + * log: enable autoflush for stdout logging (Eric Wong ) + 2012-03-30: Release version 2.60 * Fix fsck status when running for the first time (dormando ) --- CHANGES | 30 ++++++++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 5b59d7f6..de3ba9b4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,33 @@ +2012-05-18: Release version 2.61 + + * fix issue #57 by Pyry and Eric (dormando ) + (mogadm host status sometimes allowed typos) + + * avoid unnecessary devcount update in create_close (Eric Wong ) + + * sqlite: implement locking via tables (Eric Wong ) + + * worker/query: Add optional callid parameter (Gernot Vormayr ) + (allows command pipelining) + + * delete: prevent orphan files from replicator race (Eric Wong ) + + * fsck: prevent running over 100% completion (Eric Wong ) + + * fsck: cleanup and reduce unnecessary devcount updates (Eric Wong ) + + * fsck: update devcount, forget devs on unfixable FIDs (Eric Wong ) + + * fsck: log bad count correctly instead of policy violation (Eric Wong ) + + * tests: add test for fsck functionality (Eric Wong ) + + * monitor: only broadcast reject_bad_md5 on change (Eric Wong ) + + * worker: delete_domain returns has_classes error (Eric Wong ) + + * log: enable autoflush for stdout logging (Eric Wong ) + 2012-03-30: Release version 2.60 * Fix fsck status when running for the first time (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index b6ce2888..6813dcbc 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.60"; +$VERSION = "2.61"; =head1 NAME From 28fad964f2eee3f4b13614cce97e5423ebd54875 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 19 May 2012 13:02:58 -0700 Subject: [PATCH 246/405] dangling reference to MogileFS::Sys removed --- lib/MogileFS/Server.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 6813dcbc..2f0066b5 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -30,7 +30,6 @@ use List::Util; use Socket qw(SO_KEEPALIVE); use MogileFS::Util qw(daemonize); -use MogileFS::Sys; use MogileFS::Config; use MogileFS::ProcManager; From b643f33caeba94f668c6127a90c72b56a013d9ea Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Sat, 19 May 2012 19:59:52 +0000 Subject: [PATCH 247/405] Postgres: Fix v15 schema upgrade. Schema upgrade needs to use Pg-specific column types for the v15 upgrade adding class.hashtype. Only CREATE TABLE is auto-converted where possible, not ALTER TABLE. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/Postgres.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 2e626c1d..dc6b9ade 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -310,6 +310,13 @@ sub upgrade_modify_device_size { return 1; } +sub upgrade_add_class_hashtype { + my ($self) = @_; + unless ($self->column_type("class", "hashtype")) { + $self->dowell("ALTER TABLE class ADD COLUMN hashtype SMALLINT"); + } +} + # return 1 on success. die otherwise. sub enqueue_fids_to_delete { # My kingdom for a real INSERT IGNORE implementation! From 0d83c44007e4101e44b64b57f2351266c31376a0 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 19 May 2012 13:09:09 -0700 Subject: [PATCH 248/405] fix CHANGES for 2.60 :P --- CHANGES | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index de3ba9b4..b526b6af 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,10 @@ * avoid unnecessary devcount update in create_close (Eric Wong ) + * size arg check from tomas doran (dormando ) + (pass in a "size" argument (in bytes) to create_open and mogilefs will + ensure there's enough free space for it) + * sqlite: implement locking via tables (Eric Wong ) * worker/query: Add optional callid parameter (Gernot Vormayr ) @@ -20,7 +24,7 @@ * fsck: log bad count correctly instead of policy violation (Eric Wong ) - * tests: add test for fsck functionality (Eric Wong ) + * tests: add tests for fsck functionality (Eric Wong ) * monitor: only broadcast reject_bad_md5 on change (Eric Wong ) From bfdfc423221f9889eb643f17ff3c1a10e79a28dc Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 19 May 2012 13:55:15 -0700 Subject: [PATCH 249/405] Checking in changes prior to tagging of version 2.62. Changelog diff is: diff --git a/CHANGES b/CHANGES index b526b6a..6333f9f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +2012-05-19: Release version 2.62 + + * Critical bugfix for a compilation error (dormando, reported by Robin) + + * Fix for upgrading a Postgres install for checksums (Robin * H. Johnson ) + 2012-05-18: Release version 2.61 * fix issue #57 by Pyry and Eric (dormando ) --- CHANGES | 6 ++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index b526b6af..6333f9f2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +2012-05-19: Release version 2.62 + + * Critical bugfix for a compilation error (dormando, reported by Robin) + + * Fix for upgrading a Postgres install for checksums (Robin * H. Johnson ) + 2012-05-18: Release version 2.61 * fix issue #57 by Pyry and Eric (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 2f0066b5..0aea2576 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.61"; +$VERSION = "2.62"; =head1 NAME From ac5534a0c3d046e660fa7581c9173857f182bd81 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 May 2012 23:15:40 +0000 Subject: [PATCH 250/405] postgres: fix replace_into_file regression in 2.61 commit f54e6fd1076e0c71e5ab9b9dfa1bf01a7960e0e8 botched the ordering of parameters when updating the file table. --- lib/MogileFS/Store/Postgres.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index dc6b9ade..ebfed22d 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -753,7 +753,7 @@ sub replace_into_file { insert => "INSERT INTO file (fid, dmid, dkey, length, classid, devcount) VALUES (?, ?, ?, ?, ?, ?)", insert_vals => [ @arg{'fidid', 'dmid', 'key', 'length', 'classid', 'devcount'} ], update => "UPDATE file SET dmid=?, dkey=?, length=?, classid=?, devcount=? WHERE fid=?", - update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'fidid', 'devcount'} ], + update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'devcount', 'fidid'} ], ); $self->condthrow; } From 768f03b0740c7a1c3a9cf4d927063c651500a4b1 Mon Sep 17 00:00:00 2001 From: dormando Date: Tue, 29 May 2012 20:48:40 -0700 Subject: [PATCH 251/405] Checking in changes prior to tagging of version 2.63. Changelog diff is: diff --git a/CHANGES b/CHANGES index 6333f9f..07790f3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +2012-05-29: Release version 2.63 + + * Critical bugfix for Postgres users introduced by 2.61. New file uploads + would fail. (noticed by robin H. Johnson, fixed by Eric Wong) + 2012-05-19: Release version 2.62 * Critical bugfix for a compilation error (dormando, reported by Robin) --- CHANGES | 5 +++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 6333f9f2..07790f3e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +2012-05-29: Release version 2.63 + + * Critical bugfix for Postgres users introduced by 2.61. New file uploads + would fail. (noticed by robin H. Johnson, fixed by Eric Wong) + 2012-05-19: Release version 2.62 * Critical bugfix for a compilation error (dormando, reported by Robin) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 0aea2576..1c5240d7 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.62"; +$VERSION = "2.63"; =head1 NAME From 4fe092fbca118e33f368055dca49c7b6caf9fe8b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 31 May 2012 19:32:03 -0700 Subject: [PATCH 252/405] monitor skips hosts marked dead or down Host don't define readability/writability themselves, so the Host::should_get_new_files sub is renamed to "alive" and Device->can_read_from respects host status. Also, queryworker now skips down/dead hosts in cmd_get_paths. ref: http://code.google.com/p/mogilefs/issues/detail?id=46 --- lib/MogileFS/Device.pm | 4 ++-- lib/MogileFS/Host.pm | 2 +- lib/MogileFS/Worker/Monitor.pm | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index a87a24f9..3cbb1854 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -120,7 +120,7 @@ sub can_delete_from { } sub can_read_from { - return $_[0]->dstate->can_read_from; + return $_[0]->host->alive && $_[0]->dstate->can_read_from; } # FIXME: Is there a (unrelated to this code) bug where new files aren't tested @@ -131,7 +131,7 @@ sub should_get_new_files { return 0 unless $dstate->should_get_new_files; return 0 unless $self->observed_writeable; - return 0 unless $self->host->should_get_new_files; + return 0 unless $self->host->alive; # have enough disk space? (default: 100MB) my $min_free = MogileFS->config("min_free_space"); return 0 if $self->{mb_total} && diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 3af2f915..49964bc1 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -70,7 +70,7 @@ sub observed_fields { return $_[0]->fields(@observed_fields); } -sub should_get_new_files { +sub alive { return $_[0]->status eq 'alive'; } diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 6b03d8c0..fabe0218 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -97,7 +97,7 @@ sub usage_refresh { $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; next if $self->{skip_host}{$dev->hostid}; $self->check_device($dev, $have_dbh, $updateable_devices) - if $dev->dstate->should_monitor; + if $dev->can_read_from; $self->still_alive; # Ping parent if needed so we don't time out # given lots of devices. } From c84760f47c3eefa3f2a7be598f7ccc799ebd1051 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 1 Jun 2012 16:37:10 -0700 Subject: [PATCH 253/405] Device->observed_* all respects observed host state The observed unreachable state of the host should always supercede the observed state of the device. This is already the case with observed_writeable, but not with observed_readable nor observed_unreachable. The monitor worker does not (and should not, to save bandwidth) update states of all devices when a host goes down. --- lib/MogileFS/Device.pm | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 3cbb1854..08da9e98 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -89,21 +89,27 @@ sub observed_utilization { return $self->{utilization}; } +sub host_ok { + my $host = $_[0]->host; + return ($host && $host->observed_reachable); +} + sub observed_writeable { my $self = shift; - return 0 unless $self->{observed_state} && $self->{observed_state} eq 'writeable'; - my $host = $self->host or return 0; - return 0 unless $host->observed_reachable; - return 1; + return 0 unless $self->host_ok; + return $self->{observed_state} && $self->{observed_state} eq 'writeable'; } sub observed_readable { my $self = shift; + return 0 unless $self->host_ok; return $self->{observed_state} && $self->{observed_state} eq 'readable'; } sub observed_unreachable { my $self = shift; + # host is unreachability implies device unreachability + return 1 unless $self->host_ok; return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; } From 2dd9a1bf8a49ed3aaaf39c34b7c6453892dd3f26 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 1 Jun 2012 17:22:18 -0700 Subject: [PATCH 254/405] Device->should_read_from respects all host/device states should_read_from() should replace all uses of can_read_from() in non-Monitor workers. This avoids the overhead of needlessly rechecking devices either the monitor or user marked down. This simplifies queryworker logic a bit. --- lib/MogileFS/Device.pm | 6 ++++++ lib/MogileFS/Worker/Query.pm | 15 ++++----------- lib/MogileFS/Worker/Replicate.pm | 6 +++--- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 08da9e98..f1a7bc17 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -125,10 +125,16 @@ sub can_delete_from { return $_[0]->dstate->can_delete_from; } +# this method is for Monitor, other workers should use should_read_from sub can_read_from { return $_[0]->host->alive && $_[0]->dstate->can_read_from; } +# this is the only method a worker should call for checking for readability +sub should_read_from { + return $_[0]->can_read_from && ($_[0]->observed_readable || $_[0]->observed_writeable); +} + # FIXME: Is there a (unrelated to this code) bug where new files aren't tested # against the free space limit before being stored or replicated somewhere? sub should_get_new_files { diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 5f8f41fa..0075cc8a 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1145,16 +1145,13 @@ sub cmd_get_paths { # construct result paths foreach my $dev (@sorted_devs) { - next unless $dev && ($dev->can_read_from); + next unless $dev && $dev->host; - my $host = $dev->host; - next unless $dev && $host; my $dfid = MogileFS::DevFID->new($dev, $fid); my $path = $dfid->get_url; - my $currently_down = - $host->observed_unreachable || $dev->observed_unreachable; + my $currently_up = $dev->should_read_from; - if ($currently_down) { + if (! $currently_up) { $backup_path = $path; next; } @@ -1309,12 +1306,8 @@ sub cmd_edit_file { @list = grep { my $devid = $_; my $dev = $dmap->{$devid}; - my $host = $dev ? $dev->host : undef; - $dev - && $host - && $dev->can_read_from - && !($host->observed_unreachable || $dev->observed_unreachable); + $dev && $dev->should_read_from; } @list; # Take first remaining device from list diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 98981e90..3641710a 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -170,10 +170,10 @@ sub replicate_using_torepl_table { # First one we can delete from, we try to rebalance away from. for (@devs) { my $dev = Mgd::device_factory()->get_by_id($_); - # Not positive 'can_read_from' needs to be here. + # Not positive 'should_read_from' needs to be here. # We must be able to delete off of this dev so the fid can # move. - if ($dev->can_delete_from && $dev->can_read_from) { + if ($dev->can_delete_from && $dev->should_read_from) { $devfid = MogileFS::DevFID->new($dev, $f); last; } @@ -373,7 +373,7 @@ sub replicate { if ($d->dstate->should_have_files && ! $mask_devids->{$devid}) { push @on_devs_tellpol, $d; } - if ($d->dstate->can_read_from) { + if ($d->should_read_from) { push @on_up_devid, $devid; } } From d0de5c5649a4cbe99d2a2cb48c0c4d3a7c440898 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 1 Jun 2012 17:28:04 -0700 Subject: [PATCH 255/405] DevFID size/checksum respects Device->should_read_from Avoid needlessly attempting connections for checking files on host/devices the monitor (or user) marked as unreadable. This also makes the Fsck->size_on_disk function redundant. --- lib/MogileFS/DevFID.pm | 6 ++++++ lib/MogileFS/Worker/Fsck.pm | 14 ++------------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index e3468cd2..aa0c6cce 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -62,6 +62,9 @@ sub vivify_directories { # else size of file on disk (after HTTP HEAD or mogstored stat) sub size_on_disk { my $self = shift; + + return undef unless $self->device->should_read_from; + my $url = $self->get_url; my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); @@ -74,6 +77,9 @@ sub size_on_disk { # else checksum of file on disk (after HTTP GET or mogstored read) sub checksum_on_disk { my ($self, $alg, $ping_cb, $reason) = @_; + + return undef unless $self->device->should_read_from; + my $url = $self->get_url; my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 82afbc61..879df679 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -195,7 +195,7 @@ sub parallel_check_sizes { # serial, for now: (just prepping for future parallel future, # getting interface right) foreach my $df (@$dflist) { - my $size = $self->size_on_disk($df); + my $size = $df->size_on_disk; return 0 unless $cb->($df, $size); } return 1; @@ -238,7 +238,7 @@ sub fix_fid { next; } - my $disk_size = $self->size_on_disk($dfid); + my $disk_size = $dfid->size_on_disk; die "dev " . $dev->id . " unreachable" unless defined $disk_size; if ($disk_size == $fid->length) { @@ -339,21 +339,11 @@ sub forget_file_on_with_bad_checksums { } } -# returns 0 on missing, -# undef on connectivity error, -# else size of file on disk (after HTTP HEAD or mogstored stat) -sub size_on_disk { - my ($self, $dfid) = @_; - return undef if $dfid->device->dstate->is_perm_dead; - return $dfid->size_on_disk; -} - # returns -1 on missing, # undef on connectivity error, # else checksum of file on disk (after HTTP GET or mogstored read) sub checksum_on_disk { my ($self, $dfid, $alg, $ping_cb) = @_; - return undef if $dfid->device->dstate->is_perm_dead; return $dfid->checksum_on_disk($alg, $ping_cb, "fsck"); } From ae7b4152933cf6ddfd34e67032fcecffea0820d7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 1 Jun 2012 18:00:04 -0700 Subject: [PATCH 256/405] get_paths: deprioritize devs in "drain" state URLs pointing to devices set to drain are undesirable. Files may disappear off draining devices immediately after we've queried the file_on table and invalidate the paths the client sends us. --- lib/MogileFS/Worker/Query.pm | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 0075cc8a..4fb3638d 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1143,6 +1143,9 @@ sub cmd_get_paths { # keep one partially-bogus path around just in case we have nothing else to send. my $backup_path; + # files on devices set for drain may disappear soon. + my @drain_paths; + # construct result paths foreach my $dev (@sorted_devs) { next unless $dev && $dev->host; @@ -1162,11 +1165,26 @@ sub cmd_get_paths { $args->{noverify} || $dfid->size_matches; + if ($dev->dstate->should_drain) { + push @drain_paths, $path; + next; + } + my $n = ++$ret->{paths}; $ret->{"path$n"} = $path; last if $n == $pathcount; # one verified, one likely seems enough for now. time will tell. } + # deprioritize devices set for drain, they could disappear soon... + # Clients /should/ try to use lower-numbered paths first to avoid this. + if ($ret->{paths} < $pathcount && @drain_paths) { + foreach my $path (@drain_paths) { + my $n = ++$ret->{paths}; + $ret->{"path$n"} = $path; + last if $n == $pathcount; + } + } + # use our backup path if all else fails if ($backup_path && ! $ret->{paths}) { $ret->{paths} = 1; From 1386dc3b6c8943c0edb7aa790eceadd4b20a27ce Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 30 May 2012 04:30:49 +0000 Subject: [PATCH 257/405] t/00-startup: explicit fid test for create_open/close This is mainly to prevent bugs like the fix in commit ac5534a0c3d046e660fa7581c9173857f182bd81 from popping up again. --- t/00-startup.t | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/t/00-startup.t b/t/00-startup.t index 12ac0290..5aba5b9d 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -351,6 +351,22 @@ foreach my $t (qw(file file_on file_to_delete)) { # TODO: test double closing, etc. } +# give an explicit fid, to prevent bugs like the reason behind +# commit ac5534a0c3d046e660fa7581c9173857f182bd81 +# This is functionality is a bad idea otherwise. +{ + my $expfid = 2147483632; + my $opts = { fid => $expfid }; + my $fh = $mogc->new_file("explicit_fid", "1copy", 2, $opts); + die "Error: " . $mogc->errstr unless $fh; + ok((print $fh "hi" ), "wrote 2 bytes"); + ok(close($fh), "closed file"); + my $info = $mogc->file_info("explicit_fid"); + + is($info->{devcount}, 1, "devcount is 1"); + is($info->{fid}, $opts->{fid}, "explicit fid is correctly set"); +} + sub try_for { my ($tries, $code) = @_; for (1..$tries) { From 62b3bdb07551708aa469fb5d575830d69bbde7ae Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 May 2012 18:52:02 +0000 Subject: [PATCH 258/405] connection/mogstored: remove sock_if_connected() This subroutine has been unused since MogileFS 2.52 commit 18a40d29236a8a00593f914191c9fd16cda082db ("Throw out old HTTPFile->size code and use LWP") --- lib/MogileFS/Connection/Mogstored.pm | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/MogileFS/Connection/Mogstored.pm b/lib/MogileFS/Connection/Mogstored.pm index 46bd2bcf..c4ca5fd8 100644 --- a/lib/MogileFS/Connection/Mogstored.pm +++ b/lib/MogileFS/Connection/Mogstored.pm @@ -23,11 +23,6 @@ sub sock { return $self->{sock}; } -sub sock_if_connected { - my $self = shift; - return $self->{sock}; -} - sub mark_dead { my $self = shift; $self->{sock} = undef; From 42b2992434da29691ad53bc07d1a75e7e3242105 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 16 Jan 2012 10:02:14 +0000 Subject: [PATCH 259/405] test for existing case-insensitive list_keys behavior We cannot break existing case-insensitive behavior for list_keys right now, even if it's broken. This means SQLite/MySQL will use case-insensitive LIKE statements for list_keys and Postgres remains case-sensitive. --- t/store.t | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/t/store.t b/t/store.t index 79576133..96310185 100644 --- a/t/store.t +++ b/t/store.t @@ -192,4 +192,26 @@ ok(1 == $sto->delete_checksum(6), "checksum deleted OK"); ok(0 == $sto->delete_checksum(6), "checksum delete MISS"); ok(!defined $sto->get_checksum(6), "undef on missing checksum"); +# case-sensitivity tests for list_keys +my %arg = ( + fidid => 1234, + dmid => $dmid, + key => 'Case_Sensitive_Clod', + length => 1, + classid => $clsid, + devcount => 1 +); +$sto->replace_into_file(%arg); +my $rows; + +# ensure existing (broken) case-insensitive list_keys works for MySQL/SQLite +# LIKE is always case-sensitive in Postgres, so its behavior for list_keys +# was never broken. +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +if (ref($sto) eq "MogileFS::Store::Postgres") { + ok(scalar @$rows == 0, "Postgres list_keys is case-sensitive"); +} else { + ok($rows->[0] eq 'Case_Sensitive_Clod', "list_keys matches insensitively"); +} + done_testing(); From 12fa6886747751e298be6ecc508c504eeeb0fdb8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 16 Jan 2012 10:02:14 +0000 Subject: [PATCH 260/405] implement "case_sensitive_list_keys" server setting Enabling this boolean will make the "after" and "prefix" params of "list_keys" behave case-sensitively. If this setting is /not/ enabled, clients will hit after_mismatch errors when iterating through keys if they are using an uppercase "prefix" argument and a subsequent list_keys is called with an "after" that only matches case-insensitively. If unset, this defaults to false (0) to match historical (buggy) behavior. Historical behavior is preserved (even if broken) as users with small namespaces may rely on case-insensitive matching. Postgres users are not affected by this change, as the LIKE operator in Postgres is always case-sensitive. This change is tested on all three databases: Postgres, MySQL, and SQLite. --- lib/MogileFS/Config.pm | 1 + lib/MogileFS/Store.pm | 6 +++++- lib/MogileFS/Store/MySQL.pm | 5 +++++ lib/MogileFS/Store/SQLite.pm | 9 +++++++++ t/store.t | 21 +++++++++++++++++++++ 5 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 122516c0..b3d02b16 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -368,6 +368,7 @@ sub server_setting_is_writable { if ($key eq "enable_rebalance") { return $bool }; if ($key eq "skip_devcount") { return $bool }; if ($key eq "skip_mkcol") { return $bool }; + if ($key eq "case_sensitive_list_keys") { return $bool }; if ($key eq "memcache_servers") { return $any }; if ($key eq "memcache_ttl") { return $num }; if ($key eq "internal_queue_limit") { return $num }; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 5a228520..51a1a0f1 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1858,12 +1858,16 @@ sub get_keys_like { $prefix .= '%'; $after = '' unless defined $after; + my $like = $self->get_keys_like_operator; + # now select out our keys return $self->dbh->selectcol_arrayref - ('SELECT dkey FROM file WHERE dmid = ? AND dkey LIKE ? AND dkey > ? ' . + ("SELECT dkey FROM file WHERE dmid = ? AND dkey $like ? AND dkey > ? " . "ORDER BY dkey LIMIT $limit", undef, $dmid, $prefix, $after); } +sub get_keys_like_operator { return "LIKE"; } + # return arrayref of all tempfile rows (themselves also arrayrefs, of [$fidid, $devids]) # that were created $secs_ago seconds ago or older. sub old_tempfiles { diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 5cdcfa6a..32a5a8b5 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -432,6 +432,11 @@ sub pre_daemonize_checks { return $self->SUPER::pre_daemonize_checks(); } +sub get_keys_like_operator { + my $bool = MogileFS::Config->server_setting_cached('case_sensitive_list_keys'); + return $bool ? "LIKE /*! BINARY */" : "LIKE"; +} + 1; __END__ diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 1545ff26..157d3776 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -366,6 +366,15 @@ sub upgrade_modify_device_size { 1 } sub BLOB_BIND_TYPE { SQL_BLOB } +sub get_keys_like_operator { + my $self = shift; + my $bool = MogileFS::Config->server_setting_cached('case_sensitive_list_keys'); + + # this is a dbh-wide change, but this is the only place we use LIKE + $self->dbh->do("PRAGMA case_sensitive_like = " . ($bool ? "ON" : "OFF")); + return "LIKE"; +} + 1; __END__ diff --git a/t/store.t b/t/store.t index 96310185..cf9ae358 100644 --- a/t/store.t +++ b/t/store.t @@ -214,4 +214,25 @@ if (ref($sto) eq "MogileFS::Store::Postgres") { ok($rows->[0] eq 'Case_Sensitive_Clod', "list_keys matches insensitively"); } +# make list_keys case-sensitive +MogileFS::Config->set_server_setting("case_sensitive_list_keys", 1); +MogileFS::Config->cache_server_setting("case_sensitive_list_keys", 1); + +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +ok(scalar @$rows == 0, "case-incorrect list_keys fails to match"); +$rows = $sto->get_keys_like($dmid, "Case", undef, 1000); +ok($rows->[0] eq 'Case_Sensitive_Clod', "case-correct list_keys matches"); +ok(scalar @$rows == 1, "only one row matched"); + +# make list_keys case-insensitive again +MogileFS::Config->set_server_setting("case_sensitive_list_keys", 0); +MogileFS::Config->cache_server_setting("case_sensitive_list_keys", 0); + +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +if (ref($sto) eq "MogileFS::Store::Postgres") { + ok(scalar @$rows == 0, "Postgres list_keys is case-sensitive"); +} else { + ok($rows->[0] eq 'Case_Sensitive_Clod', "list_keys matches insensitively (again)"); +} + done_testing(); From f2aca49c4a6d798ad167b98ec5133ea0f6136e3e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 21 Jun 2012 09:20:54 +0000 Subject: [PATCH 261/405] t/02-host-device: unit tests for device/host state checks Device state reporting functions should respect whatever the underlying Host state is. --- t/02-host-device.t | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/t/02-host-device.t b/t/02-host-device.t index 718c2533..8e29c4c8 100644 --- a/t/02-host-device.t +++ b/t/02-host-device.t @@ -53,6 +53,56 @@ observed_state => 'writeable'}); ok($dev->can_read_from, 'can_read_from works'); ok($dev->should_get_new_files, 'should_get_new_files works'); + # monitor needs to respect can_read_from, + # everything else respects should_read_from + { + foreach my $s (qw/down dead/) { + $host->{status} = $s; + ok(!$host->alive, "host is not alive when $s"); + ok(!$dev->can_read_from, "can_read_from for device fails when host is $s"); + ok(!$dev->should_read_from, "device should not be readable when host is $s"); + } + $host->{status} = "alive"; + ok($dev->can_read_from, "device is readable from again"); + } + + # first ensure device status is respected + { + foreach my $s (qw/down dead/) { + $dev->{status} = $s; + ok(!$dev->should_read_from, "device is NOT readable when $s"); + } + foreach my $s (qw/readonly drain alive/) { + $dev->{status} = $s; + ok($dev->should_read_from, "device readable when $s"); + } + } + + # take host observed states into account for should_read_from + { + $host->{observed_state} = "unreachable"; + ok($dev->can_read_from, "device can be read from by monitor of unreachable"); + ok(! $dev->should_read_from, "device should not be read from by non-monitor workers"); + ok(! $dev->observed_readable, "device not readable"); + ok(! $dev->observed_writeable, "device not writeable"); + ok($dev->observed_unreachable, "device is unreachable"); + + $host->{observed_state} = "reachable"; + ok($dev->should_read_from, "device is readable again by non-monitor workers"); + ok($dev->observed_writeable, "device writable again"); + ok(! $dev->observed_unreachable, "device is reachable again"); + } + + # take device observed states into account for should_read_from + { + $dev->{observed_state} = "unreachable"; + ok(!$dev->should_read_from, "device should not be read from when observed unreachable"); + foreach my $s (qw/readable writeable/) { + $dev->{observed_state} = $s; + ok($dev->should_read_from, "device should be read from when observed $s"); + } + } + $hostfac->remove($host); $devfac->remove($dev); } From 2ec479877699443baf2377ffa63f209c2740f863 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Fri, 4 May 2012 15:28:45 +0300 Subject: [PATCH 262/405] Delete memcache data when we replicate fids Because memcache TTL is now user configurable, data in memcached might be valid for a long time, and as such invalid paths might be returned. It would be possible to populate memcache, instead of just removing. But it might be wasteful when a device is marked as dead, those replicated fids might not need to be in memcached. There is still one TODO left. If someone modifies mindevcount and runs FSCK, then the mappings might become incorrect, but I reasoned that it would be rather rare. --- lib/MogileFS/DevFID.pm | 3 +++ lib/MogileFS/Worker/Query.pm | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index aa0c6cce..92a798cf 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -140,6 +140,9 @@ sub add_to_db { my $sto = Mgd::get_store(); if ($sto->add_fidid_to_devid($self->{fidid}, $self->{devid})) { + if (my $memc = MogileFS::Config->memcache_client) { + $memc->delete("mogdevids:$self->{fidid}"); + } return $self->fid->update_devcount(no_lock => $no_lock); } else { # was already on that device diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 4fb3638d..4c5f252f 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1062,7 +1062,7 @@ sub cmd_get_paths { # memcache mappings are as follows: # mogfid:: -> fidid - # mogdevids: -> \@devids (and TODO: invalidate when the replication or deletion is run!) + # mogdevids: -> \@devids (and TODO: invalidate when deletion is run!) # if you specify 'noverify', that means a correct answer isn't needed and memcache can # be used. From 23322a1ef17315f7ebbcb5e053d46a0c136af026 Mon Sep 17 00:00:00 2001 From: dormando Date: Thu, 21 Jun 2012 17:04:30 -0700 Subject: [PATCH 263/405] Checking in changes prior to tagging of version 2.64. Changelog diff is: diff --git a/CHANGES b/CHANGES index 07790f3..c552089 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,15 @@ +2012-06-21: Release version 2.64 + + * Delete memcache data when we replicate fids (Pyry Hakulinen ) + + * implement "case_sensitive_list_keys" server setting (Eric Wong ) + + * get_paths: deprioritize devs in "drain" state (Eric Wong ) + + * make marking a host down cause devices to act as down (Eric Wong ) + + * monitor skips hosts marked dead or down (Eric Wong ) + 2012-05-29: Release version 2.63 * Critical bugfix for Postgres users introduced by 2.61. New file uploads --- CHANGES | 12 ++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 07790f3e..c5520898 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,15 @@ +2012-06-21: Release version 2.64 + + * Delete memcache data when we replicate fids (Pyry Hakulinen ) + + * implement "case_sensitive_list_keys" server setting (Eric Wong ) + + * get_paths: deprioritize devs in "drain" state (Eric Wong ) + + * make marking a host down cause devices to act as down (Eric Wong ) + + * monitor skips hosts marked dead or down (Eric Wong ) + 2012-05-29: Release version 2.63 * Critical bugfix for Postgres users introduced by 2.61. New file uploads diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 1c5240d7..8ad2dcfb 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.63"; +$VERSION = "2.64"; =head1 NAME From 67503116536bafc8c0450354a437c7be47b88b32 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Wed, 18 Jul 2012 12:56:15 +0000 Subject: [PATCH 264/405] When a mogstored is down, die with a more informative message. --- lib/MogileFS/Connection/Mogstored.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Connection/Mogstored.pm b/lib/MogileFS/Connection/Mogstored.pm index c4ca5fd8..c071389b 100644 --- a/lib/MogileFS/Connection/Mogstored.pm +++ b/lib/MogileFS/Connection/Mogstored.pm @@ -18,7 +18,7 @@ sub sock { return $self->{sock} if $self->{sock}; $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, PeerPort => $self->{port}, - Timeout => $timeout); + Timeout => $timeout) or die "Could not connect to mogstored on ".$self->{ip}.":".$self->{port}; $self->{sock}->sockopt(SO_KEEPALIVE, 1); return $self->{sock}; } From 159cb256593165cf5ba50f476c63859b87dea1d1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 21 Jun 2012 12:31:26 -0700 Subject: [PATCH 265/405] iostat: allow MOG_IOSTAT_CMD env override This makes it easier to test mock or alternative iostat implementations. This can be used for emulating the iostat output on other platforms. --- lib/Mogstored/ChildProcess/IOStat.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Mogstored/ChildProcess/IOStat.pm b/lib/Mogstored/ChildProcess/IOStat.pm index d2d2231c..9e64f535 100644 --- a/lib/Mogstored/ChildProcess/IOStat.pm +++ b/lib/Mogstored/ChildProcess/IOStat.pm @@ -4,7 +4,7 @@ use base 'Mogstored::ChildProcess'; my $docroot; -my $iostat_cmd = "iostat -dx 1 30"; +my $iostat_cmd = $ENV{MOG_IOSTAT_CMD} || "iostat -dx 1 30"; if ($^O =~ /darwin/) { $iostat_cmd =~ s/x// } sub pre_exec_init { From 6f0a20a8644329132b0c14aa8200dacae6c1df17 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 21 Jun 2012 12:57:49 -0700 Subject: [PATCH 266/405] iostat: increase flexibility of iostat parser The parser now looks for contiguous lines of statistics and (if it's previously captured stats) emits whenever the first non-stats line appears. Relying on the "Device:" line is not portable to FreeBSD (and possibly other iostats implementations). The parser also ignores leading/trailing whitespace on each statistics line. Tested on Linux (sysstat 10.0.5) and FreeBSD 9. For testing iostat output on FreeBSD, I used MOG_IOSTAT_CMD like this on my GNU/Linux system: MOG_IOSTAT_CMD="ssh fbsd9vm iostat -dx 1 30" mogstored ... ref: http://code.google.com/p/mogilefs/issues/detail?id=9 --- lib/Mogstored/ChildProcess/IOStat.pm | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/lib/Mogstored/ChildProcess/IOStat.pm b/lib/Mogstored/ChildProcess/IOStat.pm index 9e64f535..4ce5d8c8 100644 --- a/lib/Mogstored/ChildProcess/IOStat.pm +++ b/lib/Mogstored/ChildProcess/IOStat.pm @@ -66,26 +66,21 @@ sub run { my $mog_sysid = mog_sysid_map(); # 5 (mogdevid) -> 2340 (os devid) my $dev_sysid = {}; # hashref, populated lazily: { /dev/sdg => system dev_t } my %devt_util; # dev_t => 52.55 - my $init = 0; + my $stats = 0; while (<$iofh>) { - if (m/^Device:/) { - %devt_util = (); - $init = 1; - next; - } - next unless $init; - if (m/^ (\S+) .*? ([\d.]+) \n/x) { + if (m/^\s*(\S+)\s.*?([\d.]+)\s*$/) { my ($devnode, $util) = ("/dev/$1", $2); unless (exists $dev_sysid->{$devnode}) { $dev_sysid->{$devnode} = (stat($devnode))[6]; # rdev } my $devt = $dev_sysid->{$devnode}; $devt_util{$devt} = $util; - next; - } - # blank line is the end. - if (m!^\s*\n!) { - $init = 0; + $stats++; + } elsif ($stats) { + # blank line is the end, or any other line we don't understand + # if we have stats, we print them, otherwise do nothing + $stats = 0; + my $ret = ""; foreach my $mogdevid (sort { $a <=> $b } keys %$mog_sysid) { my $devt = $mog_sysid->{$mogdevid}; @@ -96,7 +91,7 @@ sub run { print $ret; $check_for_parent->(); - next; + %devt_util = (); } } } From b70fef95097d9d9f92c30985507197d0297f1767 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 9 Jul 2012 18:46:58 -0700 Subject: [PATCH 267/405] remove old rebalance knobs from server settings This hasn't been used since the old rebalance code was nuked for 2.40 (commit 0be2f9771a213e92d807f29bead13e6e4203ad54) --- lib/MogileFS/Config.pm | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index b3d02b16..513dfea3 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -365,7 +365,6 @@ sub server_setting_is_writable { # let slave settings go through unmodified, for now. if ($key =~ /^slave_/) { return $del_if_blank }; - if ($key eq "enable_rebalance") { return $bool }; if ($key eq "skip_devcount") { return $bool }; if ($key eq "skip_mkcol") { return $bool }; if ($key eq "case_sensitive_list_keys") { return $bool }; @@ -377,15 +376,6 @@ sub server_setting_is_writable { if ($key eq 'network_zones') { return $any }; if ($key =~ /^zone_/) { return $valid_netmask_list }; - if ($key eq "rebalance_policy") { return sub { - my $v = shift; - return undef unless $v; - # TODO: actually load the provided class and test if it loads? - die "Doesn't match acceptable format" unless - $v =~ /^[\w:\-]+$/; - return $v; - }} - # should probably restrict to (\d+) if ($key =~ /^queue_/) { return $any }; From 6e47a569d9d3de155d00e1aa92ed01df2b1ee36a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 7 Aug 2012 00:30:21 +0000 Subject: [PATCH 268/405] fix tests when /etc/mogilefs/mogstored.conf exists mogstored gains a --skipconfig switch which we use in tests to ignore the default config file. mogilefsd has had this switch (with identical semantics) since 2004. --- lib/MogileFS/Test.pm | 1 + mogstored | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index fb461170..70194272 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -123,6 +123,7 @@ sub create_mogstored { die "Failed: tracker already running on port 7500?\n" if $conn; $ENV{PERL5LIB} .= ":$Bin/../lib"; my @args = ("$Bin/../mogstored", + "--skipconfig", "--httplisten=$ip:7500", "--mgmtlisten=$ip:7501", "--maxconns=1000", # because we're not root, put it below 1024 diff --git a/mogstored b/mogstored index d11fc218..11e3c44f 100755 --- a/mogstored +++ b/mogstored @@ -37,6 +37,7 @@ my $iostat_available = 1; # bool: iostat working. assume working to start. my ($iostat_pipe_r, $iostat_pipe_w); # pipes for talking to iostat process # Config: +my $opt_skipconfig; my $opt_daemonize; my $opt_config; my $opt_iostat = 1; # default to on now @@ -54,6 +55,7 @@ $0 = "mogstored"; my %config_opts = ( 'iostat' => \$opt_iostat, + 's|skipconfig' => \$opt_skipconfig, 'daemonize|d' => \$opt_daemonize, 'config=s' => \$opt_config, 'httplisten=s' => \$http_listen, @@ -70,7 +72,7 @@ die "Unknown server type. Valid options: --server={perlbal,lighttpd,apache,none unless $server =~ /^perlbal|lighttpd|apache|none$/; $opt_config = $default_config if ! $opt_config && -e $default_config; -load_config_file($opt_config => \%config_opts) if $opt_config; +load_config_file($opt_config => \%config_opts) if $opt_config && !$opt_skipconfig; # initialize basic required Perlbal machinery, for any HTTP server my $perlbal_init = qq{ From 8c4554ca5d5d7367f00890eb05c765a4da3d237e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:31 +0000 Subject: [PATCH 269/405] tests: add basic test for reaper Reaper isn't tested anywhere else. We plan on changing it slightly so ensure we don't introduce regressions. --- MANIFEST | 1 + t/70-reaper.t | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 t/70-reaper.t diff --git a/MANIFEST b/MANIFEST index 5dfff070..eb4cb987 100644 --- a/MANIFEST +++ b/MANIFEST @@ -76,6 +76,7 @@ t/30-rebalance.t t/40-httpfile.t t/50-checksum.t t/60-fsck.t +t/70-reaper.t t/checksum.t t/fid-stat.t t/mogstored-shutdown.t diff --git a/t/70-reaper.t b/t/70-reaper.t new file mode 100644 index 00000000..bb79b76c --- /dev/null +++ b/t/70-reaper.t @@ -0,0 +1,94 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +find_mogclient_or_skip(); +use MogileFS::Admin; +use File::Temp; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 1, 3 => 1 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored"); + +while (! -e "$mogroot{1}/dev1/usage" || + ! -e "$mogroot{1}/dev2/usage" || + ! -e "$mogroot{1}/dev3/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 2), "created dev2 on hostA"); + +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); + +# create one sample file with 2 copies +my $fh = $mogc->new_file("file1", "2copies"); +ok($fh, "got filehandle"); +ok(close($fh), "closed file"); + +my $tries; +my @urls; + +# wait for it to replicate +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(@urls) == 2; + sleep .1; +} + +is(scalar(@urls), 2, "replicated to 2 paths"); +my $orig_urls = join("\n", sort(@urls)); + +# add a new device and mark an existing device as dead +ok($tmptrack->mogadm("device", "add", "hostA", 3), "created dev3 on hostA"); +ok($tmptrack->mogadm("device", "mark", "hostA", 2, "dead"), "mark dev2 as dead"); + +# reaper should notice the dead device in 5-10s +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(grep(m{/dev2/}, @urls)) == 0; + sleep 0.1; +} +is(scalar(grep(m{/dev2/}, @urls)), 0, "file1 no longer references dead dev2"); + +# replicator should replicate the file within 15-30s +for ($tries = 300; $tries--; ) { + @urls = sort($mogc->get_paths("file1")); + last if (scalar(@urls) == 2) && (join("\n", @urls) ne $orig_urls); + sleep 0.1; +} +is(grep(m{/dev3/}, @urls), 1, "file1 got copied to dev3"); +is(scalar(@urls), 2, "we have 2 paths for file1 again"); + +done_testing(); From 15eca7a9f50785e7965e2b94fd1e5b0b02dc438c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:32 +0000 Subject: [PATCH 270/405] reaper: factor out reap_fid sub from the work loop A subroutine with a 20-line comment deserves to be its own sub. This will make it easier to see what the future reaper lock will guard without needing to scroll on small terminals. --- lib/MogileFS/Worker/Reaper.pm | 53 +++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 22e52b6a..9b3c4341 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -21,6 +21,34 @@ sub watchdog_timeout { my %all_empty; # devid -> bool, if all empty of files in file_on +# order is important here: +# +# first, add fid to file_to_replicate table. it +# shouldn't matter if the replicator gets to this +# before the subsequent 'forget_about' method, as the +# replicator will treat dead file_on devices as +# non-existent anyway. however, it is important that +# we enqueue it for replication first, before we +# forget about that file_on row, otherwise a failure +# after/during 'forget_about' could leave a stranded +# file on a dead device and we'd never fix it. +# +# and just for extra safety, in case replication happened +# on another machine after 'enqueue_for_replication' but +# before 'forget_about', and that other machine hadn't yet +# re-read the device table to learn that this device +# was dead, we delay the replication for the amount of time +# that the device summary table is valid for (presumably +# the other trackers are running identical software, or +# at least have the same timeout value) +sub reap_fid { + my ($self, $fid, $dev) = @_; + + $fid->enqueue_for_replication(in => DEVICE_SUMMARY_CACHE_TIMEOUT + 1); + $dev->forget_about($fid); + $fid->update_devcount; +} + sub work { my $self = shift; @@ -42,30 +70,7 @@ sub work { $self->still_alive; foreach my $fid (@fids) { - # order is important here: - - # first, add fid to file_to_replicate table. it - # shouldn't matter if the replicator gets to this - # before the subsequent 'forget_about' method, as the - # replicator will treat dead file_on devices as - # non-existent anyway. however, it is important that - # we enqueue it for replication first, before we - # forget about that file_on row, otherwise a failure - # after/during 'forget_about' could leave a stranded - # file on a dead device and we'd never fix it. - # - # and just for extra safety, in case replication happened - # on another machine after 'enqueue_for_replication' but - # before 'forget_about', and that other machine hadn't yet - # re-read the device table to learn that this device - # was dead, we delay the replication for the amount of time - # that the device summary table is valid for (presumably - # the other trackers are running identical software, or - # at least have the same timeout value) - - $fid->enqueue_for_replication(in => DEVICE_SUMMARY_CACHE_TIMEOUT + 1); - $dev->forget_about($fid); - $fid->update_devcount; + $self->reap_fid($fid, $dev); } } }); From 57a5099b345268f6b1b6da658ece6e67eba73c12 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:33 +0000 Subject: [PATCH 271/405] reaper: global lock around DB interaction We don't want multiple reaper process stepping on each other during UPDATE/INSERT, causing needless conflicts/failures at the DB level for every single FID. JobMaster already locks its queues in a similar way to prevent conflicts, so this should not noticeably harm performance (and may improve performance due to the DB conflict reduction). --- lib/MogileFS/Worker/Reaper.pm | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 9b3c4341..0483d32a 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -62,15 +62,23 @@ sub work { my $devid = $dev->id; next if $all_empty{$devid}; - my @fids = $dev->fid_list(limit => 1000); - unless (@fids) { - $all_empty{$devid} = 1; - next; - } - $self->still_alive; + my $sto = Mgd::get_store(); + my $lock = "mgfs:reaper"; + my $lock_timeout = $self->watchdog_timeout / 4; - foreach my $fid (@fids) { - $self->reap_fid($fid, $dev); + if ($sto->get_lock($lock, $lock_timeout)) { + my @fids = $dev->fid_list(limit => 1000); + if (@fids) { + $self->still_alive; + foreach my $fid (@fids) { + $self->reap_fid($fid, $dev); + } + } else { + $all_empty{$devid} = 1; + } + $sto->release_lock($lock); + } else { + debug("get_lock($lock, $lock_timeout) failed"); } } }); From 8cc75d8414ea30839ba6d796610687133e3fcfbb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:34 +0000 Subject: [PATCH 272/405] reaper: add "queue_rate_for_reaper" server setting This controls the number of FIDs the reaper can inject into the replication queue for each dead device, per wakeup. This defaults to 1000, the same value its had since (at least) 2006. --- lib/MogileFS/Worker/Reaper.pm | 3 ++- t/70-reaper.t | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 0483d32a..0ca9e9de 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -65,9 +65,10 @@ sub work { my $sto = Mgd::get_store(); my $lock = "mgfs:reaper"; my $lock_timeout = $self->watchdog_timeout / 4; + my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; if ($sto->get_lock($lock, $lock_timeout)) { - my @fids = $dev->fid_list(limit => 1000); + my @fids = $dev->fid_list(limit => $limit); if (@fids) { $self->still_alive; foreach my $fid (@fids) { diff --git a/t/70-reaper.t b/t/70-reaper.t index bb79b76c..0fdba930 100644 --- a/t/70-reaper.t +++ b/t/70-reaper.t @@ -51,6 +51,7 @@ ok($tmptrack->mogadm("device", "add", "hostA", 2), "created dev2 on hostA"); ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("settings", "set", "queue_rate_for_reaper", 123), "set queue_rate_for_reaper"); # create one sample file with 2 copies my $fh = $mogc->new_file("file1", "2copies"); From e51ee46378324a28bf24b358c1e3a4d81326e624 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:35 +0000 Subject: [PATCH 273/405] move ENDOFTIME constant from replicate to store This will make it easier to reuse this constant in other workers that can check the queues (e.g. job_master/reaper). --- lib/MogileFS/Store.pm | 6 ++++++ lib/MogileFS/Worker/Replicate.pm | 8 +------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 51a1a0f1..0827c31b 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -2160,6 +2160,12 @@ sub delete_checksum { $self->dbh->do("DELETE FROM checksum WHERE fid = ?", undef, $fidid); } +# setup the value used in a 'nexttry' field to indicate that this item will +# never actually be tried again and require some sort of manual intervention. +use constant ENDOFTIME => 2147483647; + +sub end_of_time { ENDOFTIME; } + 1; __END__ diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 3641710a..247e07a5 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -15,12 +15,6 @@ use MogileFS::ReplicationRequest qw(rr_upgrade); use Digest; use MIME::Base64 qw(encode_base64); -# setup the value used in a 'nexttry' field to indicate that this item will never -# actually be tried again and require some sort of manual intervention. -use constant ENDOFTIME => 2147483647; - -sub end_of_time { ENDOFTIME; } - sub new { my ($class, $psock) = @_; my $self = fields::new($class); @@ -140,7 +134,7 @@ sub replicate_using_torepl_table { # special; update to a time that won't happen again, # as we've encountered a scenario in which case we're # really hosed - $sto->reschedule_file_to_replicate_absolute($fid, ENDOFTIME); + $sto->reschedule_file_to_replicate_absolute($fid, $sto->end_of_time); } elsif ($type eq "offset") { $sto->reschedule_file_to_replicate_relative($fid, $delay+0); } else { From 0715d95f850de62a479ba2b69814bcadf016a0dc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 18 Jul 2012 02:34:36 +0000 Subject: [PATCH 274/405] reaper: add queue_size_for_reaper server setting Users may now configure the queue_size_for_reaper server setting to limit the size of the non-urgent replication queue. The urgent replication queue (nexttry == 0) is unaffected, as are other processes (fsck) which may inject into the replication queue. The default remains unlimited, the reaper will queue as fast as it possibly can: 1000 FIDs every 5 seconds (per process) --- lib/MogileFS/Store.pm | 9 +++++++++ lib/MogileFS/Worker/Reaper.pm | 29 +++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0827c31b..1ac1537c 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -2166,6 +2166,15 @@ use constant ENDOFTIME => 2147483647; sub end_of_time { ENDOFTIME; } +# returns the size of the non-urgent replication queue +# nexttry == 0 - the file is urgent +# nexttry != 0 && nexttry < ENDOFTIME - the file is deferred +sub deferred_repl_queue_length { + my ($self) = @_; + + return $self->dbh->selectrow_array('SELECT COUNT(*) FROM file_to_replicate WHERE nexttry != 0 AND nexttry < ?', undef, $self->end_of_time); +} + 1; __END__ diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 0ca9e9de..4d928439 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -49,6 +49,32 @@ sub reap_fid { $fid->update_devcount; } +# this returns 1000 by default +sub reaper_inject_limit { + my ($self) = @_; + + my $sto = Mgd::get_store(); + my $max = MogileFS::Config->server_setting_cached('queue_size_for_reaper'); + my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; + + # max defaults to zero, meaning we inject $limit every wakeup + if ($max) { + # if a queue size limit is configured for reaper, prevent too many + # files from entering the repl queue: + my $len = $sto->deferred_repl_queue_length; + my $space_left = $max - $len; + + $limit = $space_left if ($limit > $space_left); + + # limit may end up being negative here since other processes + # can inject into the deferred replication queue, reaper is + # the only one which can respect this queue size + $limit = 0 if $limit < 0; + } + + return $limit; +} + sub work { my $self = shift; @@ -61,12 +87,11 @@ sub work { { my $devid = $dev->id; next if $all_empty{$devid}; + my $limit = $self->reaper_inject_limit or next; my $sto = Mgd::get_store(); my $lock = "mgfs:reaper"; my $lock_timeout = $self->watchdog_timeout / 4; - my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; - if ($sto->get_lock($lock, $lock_timeout)) { my @fids = $dev->fid_list(limit => $limit); if (@fids) { From 6218dc76eb8968e8251f507a4f916bdf97d29c7e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 19 Jul 2012 07:24:38 +0000 Subject: [PATCH 275/405] reaper: switch to Danga::Socket for scheduling Reaper now schedules the first batch of files on a newly-dead device for replication after delaying (on the reaper itself) for DEVICE_SUMMARY_CACHE_TIMEOUT+1 (16) seconds. This allows all subsequent files to replicate sooner, without the 16s delay. Since Danga::Socket is used to schedule this 16s delay, reaping of other dead devices won't be impacted by this delay. The DEVICE_SUMMARY_CACHE_TIMEOUT+1 delay still remains to offer a small level of protection against replicators with out-of-date internal device caches and writable-but-"dead" devices. As an additional countermeasure against out-of-date device caches, reapers will now slowly back off of a device over the course of 4 hours after it fails to find new, unreaped FIDs. Previously, any files that got replicated to an already "dead" device would remain there until a reaper restart. --- lib/MogileFS/Worker/Reaper.pm | 134 +++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 36 deletions(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 4d928439..26012067 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -4,8 +4,13 @@ package MogileFS::Worker::Reaper; use strict; use base 'MogileFS::Worker'; use MogileFS::Server; -use MogileFS::Util qw(every error debug); +use MogileFS::Util qw(error debug); use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); +use constant REAP_INTERVAL => 5; +use constant REAP_BACKOFF_MIN => 60; + +# completely forget about devices we've reaped after 2 hours of idleness +use constant REAP_BACKOFF_MAX => 7200; sub new { my ($class, $psock) = @_; @@ -19,8 +24,6 @@ sub watchdog_timeout { return 240; } -my %all_empty; # devid -> bool, if all empty of files in file_on - # order is important here: # # first, add fid to file_to_replicate table. it @@ -32,19 +35,10 @@ my %all_empty; # devid -> bool, if all empty of files in file_on # forget about that file_on row, otherwise a failure # after/during 'forget_about' could leave a stranded # file on a dead device and we'd never fix it. -# -# and just for extra safety, in case replication happened -# on another machine after 'enqueue_for_replication' but -# before 'forget_about', and that other machine hadn't yet -# re-read the device table to learn that this device -# was dead, we delay the replication for the amount of time -# that the device summary table is valid for (presumably -# the other trackers are running identical software, or -# at least have the same timeout value) sub reap_fid { my ($self, $fid, $dev) = @_; - $fid->enqueue_for_replication(in => DEVICE_SUMMARY_CACHE_TIMEOUT + 1); + $fid->enqueue_for_replication(in => 1); $dev->forget_about($fid); $fid->update_devcount; } @@ -75,39 +69,107 @@ sub reaper_inject_limit { return $limit; } +# we pass the $devid here (instead of a Device object) to avoid +# potential memory leaks since this sub reschedules itself to run +# forever. $delay is the current delay we were scheduled at +sub reap_dev { + my ($self, $devid, $delay) = @_; + my $limit = $self->reaper_inject_limit; + + # just in case a user mistakenly nuked a devid from the device table: + my $dev = Mgd::device_factory()->get_by_id($devid); + unless ($dev) { + error("No device row for dev$devid, cannot reap"); + $delay = undef; + } + + # limit == 0 if we hit the queue size limit, we'll just reschedule + if ($limit && $dev) { + my $sto = Mgd::get_store(); + my $lock = "mgfs:reaper"; + my $lock_timeout = $self->watchdog_timeout / 4; + my @fids; + + if ($sto->get_lock($lock, $lock_timeout)) { + @fids = $dev->fid_list(limit => $limit); + if (@fids) { + $self->still_alive; + foreach my $fid (@fids) { + $self->reap_fid($fid, $dev); + } + } + $sto->release_lock($lock); + } else { + debug("get_lock($lock, $lock_timeout) failed"); + } + + # if we've found any FIDs (perhaps even while backing off) + # ensure we try to find more soon: + if (@fids) { + $delay = REAP_INTERVAL; + } else { + $delay = $self->reap_dev_backoff_delay($delay); + } + } + + return unless defined $delay; + + # schedule another update, delay could be REAP_BACKOFF_MAX + Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); +} + +# called when we're hopefully all done with a device, but reschedule +# into the future in case the replicator had an out-of-date cache and the +# "dead" device was actually writable. +sub reap_dev_backoff_delay { + my ($self, $delay) = @_; + + return REAP_BACKOFF_MIN if ($delay < REAP_BACKOFF_MIN); + + $delay *= 2; + return $delay > REAP_BACKOFF_MAX ? undef : $delay; +} + +# looks for dead devices sub work { my $self = shift; - every(5, sub { + # we just forked from our parent process, also using Danga::Socket, + # so we need to lose all that state and start afresh. + Danga::Socket->Reset; + + # ensure we get monitor updates + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + + my %devid_seen; + my $reap_check; + $reap_check = sub { # get db and note we're starting a run debug("Reaper running; looking for dead devices"); + $self->still_alive; foreach my $dev (grep { $_->dstate->is_perm_dead } Mgd::device_factory()->get_all) { - my $devid = $dev->id; - next if $all_empty{$devid}; - my $limit = $self->reaper_inject_limit or next; - - my $sto = Mgd::get_store(); - my $lock = "mgfs:reaper"; - my $lock_timeout = $self->watchdog_timeout / 4; - if ($sto->get_lock($lock, $lock_timeout)) { - my @fids = $dev->fid_list(limit => $limit); - if (@fids) { - $self->still_alive; - foreach my $fid (@fids) { - $self->reap_fid($fid, $dev); - } - } else { - $all_empty{$devid} = 1; - } - $sto->release_lock($lock); - } else { - debug("get_lock($lock, $lock_timeout) failed"); - } + next if $devid_seen{$dev->id}; + + # delay the initial device reap in case any replicator cache + # thinks the device is still alive + Danga::Socket->AddTimer(DEVICE_SUMMARY_CACHE_TIMEOUT + 1, sub { + $self->reap_dev($dev->id, REAP_INTERVAL); + }); + + # once we've seen a device, reap_dev will takeover scheduling + # reaping for the given device. + $devid_seen{$dev->id} = 1; } - }); + + Danga::Socket->AddTimer(REAP_INTERVAL, $reap_check); + }; + + # kick off the reaper and loop forever + $reap_check->(); + Danga::Socket->EventLoop; } 1; From f015219863e8e4db8e62f4a484827b16387c6776 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 19 Jul 2012 07:57:22 +0000 Subject: [PATCH 276/405] reaper: remove update_devcount call Since reaper now schedule replications with the same priority as fsck, we will also rely on the replicator to call update_devcount for us, allowing us to avoid making an unnecessary write to the database. --- lib/MogileFS/Worker/Reaper.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 26012067..ac23320e 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -40,7 +40,6 @@ sub reap_fid { $fid->enqueue_for_replication(in => 1); $dev->forget_about($fid); - $fid->update_devcount; } # this returns 1000 by default From 84dee818f38fb66fdc6c678230ba27f3263773e8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 4 Aug 2012 01:37:11 +0000 Subject: [PATCH 277/405] reaper: better handling of lock failures The delay backoff should only occur if we got a successful lock. Backing off the delay on lock failure can result in the delay becoming undef and (incorrectly) making the reaper give up on a device. Fortunately, lock failures with the extremely long (60s) lock timeout is unlikely to be a problem in practice. --- lib/MogileFS/Worker/Reaper.pm | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index ac23320e..3f0866d1 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -98,16 +98,19 @@ sub reap_dev { } } $sto->release_lock($lock); + + # if we've found any FIDs (perhaps even while backing off) + # ensure we try to find more soon: + if (@fids) { + $delay = REAP_INTERVAL; + } else { + $delay = $self->reap_dev_backoff_delay($delay); + } } else { + # No lock after a long lock_timeout? Try again soon. + # We should never get here under MySQL, and rarely for other DBs. debug("get_lock($lock, $lock_timeout) failed"); - } - - # if we've found any FIDs (perhaps even while backing off) - # ensure we try to find more soon: - if (@fids) { $delay = REAP_INTERVAL; - } else { - $delay = $self->reap_dev_backoff_delay($delay); } } From 208b43f38eeda6f0751b219e94726330f6b997d0 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Fri, 18 May 2012 16:31:03 +0000 Subject: [PATCH 278/405] Postgres advisory lock instead of table-based lock Update Pg locking code to use Postgres advisory locks. Now requires Postgres 8.4 as min version. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/Postgres.pm | 38 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index ebfed22d..ccbbe893 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -62,7 +62,8 @@ sub init { $self->SUPER::init; my $database_version = $self->dbh->get_info(18); # SQL_DBMS_VER # We need >=pg-8.2 because we use SAVEPOINT and ROLLBACK TO. - die "Postgres is too old! Must use >=postgresql-8.2!" if($database_version =~ /\A0[0-7]\.|08\.0[01]/); + # We need >=pg-8.4 for working advisory locks + die "Postgres is too old! Must use >=postgresql-8.4!" if($database_version =~ /\A0[0-7]\.|08\.0[0123]/); $self->{lock_depth} = 0; } @@ -787,27 +788,30 @@ sub lockid { # returns 1 on success and 0 on timeout sub get_lock { my ($self, $lockname, $timeout) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); - die "Lock recursion detected (grabbing $lockname ($lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + die "Lock recursion detected (grabbing ".hostname."$lockname ($hostid/$lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; my $lock = undef; - while($timeout >= 0 and not defined($lock)) { - $lock = eval { $self->dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, hostname, $$) }; - if($self->was_duplicate_error) { - $timeout--; - sleep 1 if $timeout > 0; - next; - } + while($timeout >= 0) { + $lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $hostid, $lockid); $self->condthrow; - #$lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $lockid, $timeout); - #warn("$$ Lock result=$lock\n"); - if (defined $lock and $lock == 1) { - $self->{lock_depth} = 1; - $self->{last_lock} = $lockname; + if (defined $lock) { + if($lock == 1) { + $self->{lock_depth} = 1; + $self->{last_lock} = $lockname; + last; + } elsif($lock == 0) { + $timeout--; + sleep 1 if $timeout > 0; + next; + } else { + die "Something went horribly wrong while getting lock $lockname - unknown return value"; + } } else { - die "Something went horribly wrong while getting lock $lockname"; + die "Something went horribly wrong while getting lock $lockname - undefined lock"; } } return $lock; @@ -817,10 +821,10 @@ sub get_lock { # returns 1 on success and 0 if no lock we have has that name. sub release_lock { my ($self, $lockname) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); debug("$$ Unlocking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; - #my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?)", undef, $lockid); - my $rv = $self->dbh->do('DELETE FROM lock WHERE lockid=? AND pid=? AND hostname=?', undef, $lockid, $$, hostname); + my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?, ?)", undef, $hostid, $lockid); debug("Double-release of lock $lockname!") if $self->{lock_depth} != 0 and $rv == 0 and $Mgd::DEBUG >= 2; $self->condthrow; $self->{lock_depth} = 0; From 21a66942fde3bb4f9e5ee24dac787d3c9ebbb41f Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Sat, 19 May 2012 16:46:08 +0000 Subject: [PATCH 279/405] Cleanup lock timeout sleep location per Eric. Signed-off-by: Robin H. Johnson --- lib/MogileFS/Store/Postgres.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index ccbbe893..09207f18 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -790,7 +790,7 @@ sub get_lock { my ($self, $lockname, $timeout) = @_; my $hostid = lockid(hostname); my $lockid = lockid($lockname); - die "Lock recursion detected (grabbing ".hostname."$lockname ($hostid/$lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + die sprintf("Lock recursion detected (grabbing %s on %s (%s/%s), had %s (%s). Bailing out.", $lockname, hostname, $hostid, $lockid, $self->{last_lock}, lockid($self->{last_lock})) if $self->{lock_depth}; debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; @@ -804,8 +804,8 @@ sub get_lock { $self->{last_lock} = $lockname; last; } elsif($lock == 0) { - $timeout--; sleep 1 if $timeout > 0; + $timeout--; next; } else { die "Something went horribly wrong while getting lock $lockname - unknown return value"; From fae96638ab8d7f385fc9ab14577ce4cbe7fc9835 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 13 Aug 2012 17:55:43 -0700 Subject: [PATCH 280/405] Checking in changes prior to tagging of version 2.65. Changelog diff is: diff --git a/CHANGES b/CHANGES index c552089..64455f4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,28 @@ +2012-08-13: Release version 2.65 + + * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) + Now requires minimum Postgres version of pg8.4. + + * reaper: switch to Danga::Socket for scheduling (Eric Wong ) + + * reaper: add queue_size_for_reaper server setting (Eric Wong ) + + * reaper: add "queue_rate_for_reaper" server setting (Eric Wong ) + defaults to 1000, same as previously. + + * reaper: global lock around DB interaction (Eric Wong ) + prevents reapers clobbering each other, causing a reduction in DB writes. + + * tests: add basic test for reaper (Eric Wong ) + + * fix tests when /etc/mogilefs/mogstored.conf exists (Eric Wong ) + + * iostat: increase flexibility of iostat parser (Eric Wong ) + + * iostat: allow MOG_IOSTAT_CMD env override (Eric Wong ) + + * When a mogstored is down, die with a more informative message. (Dave Lambley ) + 2012-06-21: Release version 2.64 * Delete memcache data when we replicate fids (Pyry Hakulinen ) --- CHANGES | 25 +++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index c5520898..64455f42 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,28 @@ +2012-08-13: Release version 2.65 + + * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) + Now requires minimum Postgres version of pg8.4. + + * reaper: switch to Danga::Socket for scheduling (Eric Wong ) + + * reaper: add queue_size_for_reaper server setting (Eric Wong ) + + * reaper: add "queue_rate_for_reaper" server setting (Eric Wong ) + defaults to 1000, same as previously. + + * reaper: global lock around DB interaction (Eric Wong ) + prevents reapers clobbering each other, causing a reduction in DB writes. + + * tests: add basic test for reaper (Eric Wong ) + + * fix tests when /etc/mogilefs/mogstored.conf exists (Eric Wong ) + + * iostat: increase flexibility of iostat parser (Eric Wong ) + + * iostat: allow MOG_IOSTAT_CMD env override (Eric Wong ) + + * When a mogstored is down, die with a more informative message. (Dave Lambley ) + 2012-06-21: Release version 2.64 * Delete memcache data when we replicate fids (Pyry Hakulinen ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 8ad2dcfb..6d0d4c8f 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.64"; +$VERSION = "2.65"; =head1 NAME From 823beccfd97529c8b1e7c7ebf2461710c335570e Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 3 Nov 2012 00:21:39 -0400 Subject: [PATCH 281/405] [#58] load the latest version of the nginx module --- lib/Mogstored/HTTPServer/Nginx.pm | 125 ++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 lib/Mogstored/HTTPServer/Nginx.pm diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm new file mode 100644 index 00000000..5759295d --- /dev/null +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -0,0 +1,125 @@ +package Mogstored::HTTPServer::Nginx; +use strict; +use base 'Mogstored::HTTPServer'; +use File::Temp (); +my $nginxpidfile; + +sub start { + + my $self = shift; + my $exe = $self->{bin}; + + if ($exe && ! -x $exe) { + die "Provided nginx path $exe not valid.\n"; + } + unless ($exe) { + my @loc = qw(/usr/sbin/nginx + /usr/local/bin/nginx + /usr/bin/nginx + ); + foreach my $loc (@loc) { + $exe = $loc; + last if -x $exe; + } + unless (-x $exe) { + die "Can't find nginx in @loc\n"; + } + } + + $nginxpidfile = "/var/run/nginx.pid"; + + my $nginxpid = _getpid(); + # TODO: Support reloading of nginx instead? + if ($nginxpid) { + my $killed = kill 15,$nginxpid; + if ($killed > 0) { + print "Killed nginx on PID # $nginxpid"; + } + } + + my ($fh, $filename) = File::Temp::tempfile(); + $self->{temp_conf_file} = $filename; + + my $portnum = $self->listen_port; + my $bind_ip = $self->bind_ip; + + my $client_max_body_size = "0"; + $client_max_body_size = $self->{client_max_body_size} + if $self->{client_max_body_size}; + + # TODO: Pull from config file? + #print "client_max_body_size = $client_max_body_size\n"; + + my @devdirs = _disks($self->{docroot}); + my $devsection = ''; + + foreach my $devid (@devdirs) { + my $devseg = qq{ + location /dev$devid { + root $self->{docroot}; + client_body_temp_path $self->{docroot}/dev$devid/.tmp; + dav_methods put delete; + dav_access user:rw group:rw all:r; + create_full_put_path on; + } + }; + $devsection = $devsection . $devseg; + } + + print $fh qq{ +worker_processes 15; +events { + worker_connections 1024; +} +http { + default_type application/octet-stream; + sendfile on; + keepalive_timeout 0; + tcp_nodelay on; + client_max_body_size $client_max_body_size; + server_tokens off; + access_log /var/log/nginx/mogile-access.log; + error_log /var/log/nginx/mogile-error.log; + server { + listen $bind_ip:$portnum; + charset utf-8; + $devsection + location / { + autoindex on; + root $self->{docroot}; + } + error_page 500 502 503 504 /50x.html; + location /50x.html { + root html; + } + } +} +}; + + close $fh; + system $exe, "-c", $filename; + + return 1; +} + +sub _disks { + my $root = shift; + opendir(my $dh, $root) or die "Failed to open docroot: $root: $!"; + return scalar grep { /^dev\d+$/ } readdir($dh); +} + +sub _getpid { + local $/ = undef; + open FILE, $nginxpidfile or return; + binmode FILE; + my $string = ; + close FILE; + return $string; +} + +sub DESTROY { + my $self = shift; + unlink $self->{temp_conf_file} if $self->{temp_conf_file}; +} + +1; From 7205d52b92f05161784a28db351dab281eff6048 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 3 Nov 2012 00:52:17 -0400 Subject: [PATCH 282/405] [#58] load the Nginx server file so it can be used --- mogstored | 1 + 1 file changed, 1 insertion(+) diff --git a/mogstored b/mogstored index 11e3c44f..543a8c8e 100755 --- a/mogstored +++ b/mogstored @@ -23,6 +23,7 @@ use Mogstored::HTTPServer::Perlbal; use Mogstored::HTTPServer::Lighttpd; use Mogstored::HTTPServer::None; use Mogstored::HTTPServer::Apache; +use Mogstored::HTTPServer::Nginx; use Mogstored::SideChannelListener; use Mogstored::SideChannelClient; From f8cfff6e2d7427c00bd62ce052b3fe7de9f64bd0 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 3 Nov 2012 00:53:03 -0400 Subject: [PATCH 283/405] [#58] fix the code generating sections for each device --- lib/Mogstored/HTTPServer/Nginx.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 5759295d..f50d69a9 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -55,9 +55,9 @@ sub start { foreach my $devid (@devdirs) { my $devseg = qq{ - location /dev$devid { + location /$devid { root $self->{docroot}; - client_body_temp_path $self->{docroot}/dev$devid/.tmp; + client_body_temp_path $self->{docroot}/$devid/.tmp; dav_methods put delete; dav_access user:rw group:rw all:r; create_full_put_path on; @@ -105,7 +105,7 @@ http { sub _disks { my $root = shift; opendir(my $dh, $root) or die "Failed to open docroot: $root: $!"; - return scalar grep { /^dev\d+$/ } readdir($dh); + return grep { /^dev\d+$/ } readdir($dh); } sub _getpid { From 33fef38853dcdc2e8ee45dfc5bd2cbd91d5ca61b Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 3 Nov 2012 01:13:15 -0400 Subject: [PATCH 284/405] [#58] disable logging and move the pid to the data docroot to make nginx backend less architecture dependent --- lib/Mogstored/HTTPServer/Nginx.pm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index f50d69a9..24804155 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -26,7 +26,7 @@ sub start { } } - $nginxpidfile = "/var/run/nginx.pid"; + $nginxpidfile = $self->{docroot} . "/nginx.pid"; my $nginxpid = _getpid(); # TODO: Support reloading of nginx instead? @@ -67,6 +67,7 @@ sub start { } print $fh qq{ +pid $nginxpidfile; worker_processes 15; events { worker_connections 1024; @@ -78,8 +79,8 @@ http { tcp_nodelay on; client_max_body_size $client_max_body_size; server_tokens off; - access_log /var/log/nginx/mogile-access.log; - error_log /var/log/nginx/mogile-error.log; + access_log off; + error_log /dev/null crit; server { listen $bind_ip:$portnum; charset utf-8; From 9424a0472d96357e80b66862290ea3ac50f1688f Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Mon, 12 Nov 2012 10:05:48 -0500 Subject: [PATCH 285/405] [#58] remove a couple unnecessary configuration directives per Gernot's recommendation tcp_nodelay defaults to on, so there is no need to specify it remove unnecessary error_page config, there is no need for pretty error pages --- lib/Mogstored/HTTPServer/Nginx.pm | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 24804155..4dbf6a37 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -76,7 +76,6 @@ http { default_type application/octet-stream; sendfile on; keepalive_timeout 0; - tcp_nodelay on; client_max_body_size $client_max_body_size; server_tokens off; access_log off; @@ -89,10 +88,6 @@ http { autoindex on; root $self->{docroot}; } - error_page 500 502 503 504 /50x.html; - location /50x.html { - root html; - } } } }; From bc37655615f9408ea3fda561ee5cd943077234db Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Mon, 12 Nov 2012 10:44:45 -0500 Subject: [PATCH 286/405] [#58] only specify the root once in the server directive instead of for each configured location --- lib/Mogstored/HTTPServer/Nginx.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 4dbf6a37..f7794876 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -56,7 +56,6 @@ sub start { foreach my $devid (@devdirs) { my $devseg = qq{ location /$devid { - root $self->{docroot}; client_body_temp_path $self->{docroot}/$devid/.tmp; dav_methods put delete; dav_access user:rw group:rw all:r; @@ -82,11 +81,11 @@ http { error_log /dev/null crit; server { listen $bind_ip:$portnum; + root $self->{docroot}; charset utf-8; $devsection location / { autoindex on; - root $self->{docroot}; } } } From d69de204198032a81145d8cf552c5e9267a394a0 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Mon, 12 Nov 2012 14:01:24 -0500 Subject: [PATCH 287/405] [#58] relocate the prefix directory to keep nginx from conflicting with other running copies. Thanks Gernot for the heads up --- lib/Mogstored/HTTPServer/Nginx.pm | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index f7794876..7dfd3209 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -26,6 +26,7 @@ sub start { } } + my $prefixDir = $self->{docroot} . '/.tmp'; $nginxpidfile = $self->{docroot} . "/nginx.pid"; my $nginxpid = _getpid(); @@ -68,6 +69,7 @@ sub start { print $fh qq{ pid $nginxpidfile; worker_processes 15; +error_log /dev/null crit; events { worker_connections 1024; } @@ -78,21 +80,26 @@ http { client_max_body_size $client_max_body_size; server_tokens off; access_log off; - error_log /dev/null crit; server { listen $bind_ip:$portnum; root $self->{docroot}; charset utf-8; $devsection + location /.tmp { + deny all; + } location / { autoindex on; } } } }; - close $fh; - system $exe, "-c", $filename; + + # create prefix directory and start server + mkdir $prefixDir; + mkdir $prefixDir.'/logs'; + system $exe, '-p', $prefixDir, "-c", $filename; return 1; } From 0d6e7cd9378959e753a23328f7ec3668f56fdc85 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Mon, 12 Nov 2012 14:02:39 -0500 Subject: [PATCH 288/405] [#58] store the nginx pid in the prefix dir and reduce the scope of the variable --- lib/Mogstored/HTTPServer/Nginx.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 7dfd3209..be1cfb25 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -1,11 +1,10 @@ package Mogstored::HTTPServer::Nginx; + use strict; use base 'Mogstored::HTTPServer'; use File::Temp (); -my $nginxpidfile; sub start { - my $self = shift; my $exe = $self->{bin}; @@ -27,9 +26,9 @@ sub start { } my $prefixDir = $self->{docroot} . '/.tmp'; - $nginxpidfile = $self->{docroot} . "/nginx.pid"; + my $nginxpidfile = $prefixDir . '/nginx.pid'; - my $nginxpid = _getpid(); + my $nginxpid = _getpid($nginxpidfile); # TODO: Support reloading of nginx instead? if ($nginxpid) { my $killed = kill 15,$nginxpid; @@ -111,6 +110,7 @@ sub _disks { } sub _getpid { + my ($nginxpidfile) = @_; local $/ = undef; open FILE, $nginxpidfile or return; binmode FILE; From 88a39f3ae9dd7141e9ea6ccd88cc9a3c63951fc0 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Mon, 12 Nov 2012 14:34:38 -0500 Subject: [PATCH 289/405] clean up formatting, no functional changes --- lib/Mogstored/HTTPServer/Nginx.pm | 91 ++++++++++++++++--------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index be1cfb25..e3fd70c9 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -33,7 +33,7 @@ sub start { if ($nginxpid) { my $killed = kill 15,$nginxpid; if ($killed > 0) { - print "Killed nginx on PID # $nginxpid"; + print "Killed nginx on PID # $nginxpid"; } } @@ -54,53 +54,54 @@ sub start { my $devsection = ''; foreach my $devid (@devdirs) { - my $devseg = qq{ - location /$devid { - client_body_temp_path $self->{docroot}/$devid/.tmp; - dav_methods put delete; - dav_access user:rw group:rw all:r; - create_full_put_path on; - } - }; - $devsection = $devsection . $devseg; + my $devseg = qq{ + location /$devid { + client_body_temp_path $self->{docroot}/$devid/.tmp; + dav_methods put delete; + dav_access user:rw group:rw all:r; + create_full_put_path on; + } + }; + $devsection .= $devseg; } - + print $fh qq{ -pid $nginxpidfile; -worker_processes 15; -error_log /dev/null crit; -events { - worker_connections 1024; -} -http { - default_type application/octet-stream; - sendfile on; - keepalive_timeout 0; - client_max_body_size $client_max_body_size; - server_tokens off; - access_log off; - server { - listen $bind_ip:$portnum; - root $self->{docroot}; - charset utf-8; - $devsection - location /.tmp { - deny all; + pid $nginxpidfile; + worker_processes 15; + error_log /dev/null crit; + events { + worker_connections 1024; } - location / { - autoindex on; + http { + default_type application/octet-stream; + sendfile on; + keepalive_timeout 0; + client_max_body_size $client_max_body_size; + server_tokens off; + access_log off; + server { + listen $bind_ip:$portnum; + root $self->{docroot}; + charset utf-8; + + $devsection + location /.tmp { + deny all; + } + location / { + autoindex on; + } + } } - } -} -}; - close $fh; + }; + close $fh; # create prefix directory and start server mkdir $prefixDir; mkdir $prefixDir.'/logs'; - system $exe, '-p', $prefixDir, "-c", $filename; + system $exe, '-p', $prefixDir, '-c', $filename; - return 1; + return 1; } sub _disks { @@ -111,12 +112,12 @@ sub _disks { sub _getpid { my ($nginxpidfile) = @_; - local $/ = undef; - open FILE, $nginxpidfile or return; - binmode FILE; - my $string = ; - close FILE; - return $string; + local $/ = undef; + open FILE, $nginxpidfile or return; + binmode FILE; + my $string = ; + close FILE; + return $string; } sub DESTROY { From 7904b3813f98f27cf9bec69a75bc4b99859a7743 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Tue, 13 Nov 2012 10:52:03 -0500 Subject: [PATCH 290/405] [#58] die if nginx fails to start --- lib/Mogstored/HTTPServer/Nginx.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index e3fd70c9..8bddbd0e 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -99,7 +99,10 @@ sub start { # create prefix directory and start server mkdir $prefixDir; mkdir $prefixDir.'/logs'; - system $exe, '-p', $prefixDir, '-c', $filename; + my $retval = system $exe, '-p', $prefixDir, '-c', $filename; + + # throw an error if nginx failed to start + die "nginx failed to start\n" if($retval != 0); return 1; } From d31497a9aa90c4265c8f032d7c9b9848a4a769d6 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Tue, 13 Nov 2012 12:02:51 -0500 Subject: [PATCH 291/405] [#58] utilize non-daemon mode for nginx >= 1.0.9 --- lib/Mogstored/HTTPServer/Nginx.pm | 66 +++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 8bddbd0e..adfe90fa 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -25,15 +25,45 @@ sub start { } } - my $prefixDir = $self->{docroot} . '/.tmp'; - my $nginxpidfile = $prefixDir . '/nginx.pid'; - - my $nginxpid = _getpid($nginxpidfile); - # TODO: Support reloading of nginx instead? - if ($nginxpid) { - my $killed = kill 15,$nginxpid; - if ($killed > 0) { - print "Killed nginx on PID # $nginxpid"; + # get meta-data about nginx binary + my $nginxMeta = `$exe -V 2>&1`; + my @version = (0,0,0); + if($nginxMeta =~ /nginx\/(\d+)\.(\d+)\.(\d+)/sog) { + @version = ($1,$2,$3); + } + + # determine if nginx can be run in non-daemon mode, supported in $version >= 1.0.9 (non-daemon provides better shutdown/crash support) + # See: http://nginx.org/en/docs/faq/daemon_master_process_off.html + my $nondaemon = $version[0] > 1 || ($version[0] == 1 && $version[1] > 0) || ($version[0] == 1 && $version[1] == 0 && $version[2] >= 9); + + # create tmp directory + my $tmpDir = $self->{docroot} . '/.tmp'; + mkdir $tmpDir; + + my $pidFile = $tmpDir . '/nginx.pid'; + + # fork if nginx supports non-daemon mode + if($nondaemon) { + my $pid = fork(); + die "Can't fork: $!" unless defined $pid; + + if ($pid) { + $self->{pid} = $pid; + Mogstored->on_pid_death($pid => sub { + die "nginx died"; + }); + return; + } + } + # otherwise, try killing previous instance of nginx + else { + my $nginxpid = _getpid($pidFile); + # TODO: Support reloading of nginx instead? + if ($nginxpid) { + my $killed = kill 15,$nginxpid; + if ($killed > 0) { + print "Killed nginx on PID # $nginxpid"; + } } } @@ -66,7 +96,7 @@ sub start { } print $fh qq{ - pid $nginxpidfile; + pid $pidFile; worker_processes 15; error_log /dev/null crit; events { @@ -96,13 +126,15 @@ sub start { }; close $fh; - # create prefix directory and start server - mkdir $prefixDir; - mkdir $prefixDir.'/logs'; - my $retval = system $exe, '-p', $prefixDir, '-c', $filename; - - # throw an error if nginx failed to start - die "nginx failed to start\n" if($retval != 0); + # start nginx + if($nondaemon) { + exec $exe, '-g', 'daemon off;', '-c', $filename; + exit; + } + else { + my $retval = system $exe, '-c', $filename; + die "nginx failed to start\n" if($retval != 0); + } return 1; } From 15100882679ff5fa25501a285d0c6f32a0f2f4a8 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Tue, 13 Nov 2012 13:41:12 -0500 Subject: [PATCH 292/405] [#58] relocate all temp_path's to a temp path specific to mogstored this attempts to prevent conflicts with other running instances of nginx --- lib/Mogstored/HTTPServer/Nginx.pm | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index adfe90fa..6b7402d0 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -39,6 +39,7 @@ sub start { # create tmp directory my $tmpDir = $self->{docroot} . '/.tmp'; mkdir $tmpDir; + mkdir $tmpDir.'/logs'; my $pidFile = $tmpDir . '/nginx.pid'; @@ -95,6 +96,21 @@ sub start { $devsection .= $devseg; } + # determine which temp_path directives are required to isolate this instance of nginx + my $tempPath = "client_body_temp_path $tmpDir/client_body_temp;\n"; + unless($nginxMeta =~ /--without-http_fastcgi_module/sog) { + $tempPath .= "fastcgi_temp_path $tmpDir/fastcgi_temp;\n"; + } + unless($nginxMeta =~ /--without-http_proxy_module/sog) { + $tempPath .= "proxy_temp_path $tmpDir/proxy_temp;\n"; + } + unless($nginxMeta =~ /--without-http_uwsgi_module/sog) { + $tempPath .= "uwsgi_temp_path $tmpDir/uwsgi_temp;\n"; + } + unless($nginxMeta =~ /--without-http_scgi_module/sog) { + $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; + } + print $fh qq{ pid $pidFile; worker_processes 15; @@ -122,17 +138,21 @@ sub start { autoindex on; } } + + $tempPath } + + lock_file $tmpDir/lock_file; }; close $fh; # start nginx if($nondaemon) { - exec $exe, '-g', 'daemon off;', '-c', $filename; + exec $exe, '-p', $tmpDir, '-g', 'daemon off;', '-c', $filename; exit; } else { - my $retval = system $exe, '-c', $filename; + my $retval = system $exe, '-p', $tmpDir, '-c', $filename; die "nginx failed to start\n" if($retval != 0); } From 5a635e608f460919f3f4c64f57488ed2b57be624 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Tue, 13 Nov 2012 14:14:20 -0500 Subject: [PATCH 293/405] [#58] support nginx server type in command line options --- mogstored | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mogstored b/mogstored index 543a8c8e..608d786b 100755 --- a/mogstored +++ b/mogstored @@ -69,8 +69,8 @@ my %config_opts = ( ); usage() unless Getopt::Long::GetOptions(%config_opts); -die "Unknown server type. Valid options: --server={perlbal,lighttpd,apache,none}" - unless $server =~ /^perlbal|lighttpd|apache|none$/; +die "Unknown server type. Valid options: --server={perlbal,lighttpd,apache,nginx,none}" + unless $server =~ /^perlbal|lighttpd|apache|nginx|none$/; $opt_config = $default_config if ! $opt_config && -e $default_config; load_config_file($opt_config => \%config_opts) if $opt_config && !$opt_skipconfig; From 02ed229e72a6d3589d9f84a076d3e310707cfc32 Mon Sep 17 00:00:00 2001 From: Gernot Vormayr Date: Sun, 23 Dec 2012 23:20:48 +0100 Subject: [PATCH 294/405] Moved utf-8 config to http block --- lib/Mogstored/HTTPServer/Nginx.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 6b7402d0..b1782b90 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -125,10 +125,10 @@ sub start { client_max_body_size $client_max_body_size; server_tokens off; access_log off; + charset utf-8; server { listen $bind_ip:$portnum; root $self->{docroot}; - charset utf-8; $devsection location /.tmp { From db1187d9ee81c894b4da1eaabbac04c7c6cc7c1a Mon Sep 17 00:00:00 2001 From: Gernot Vormayr Date: Sun, 23 Dec 2012 23:33:48 +0100 Subject: [PATCH 295/405] if one really wants to be root - let him be --- lib/Mogstored/HTTPServer/Nginx.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index b1782b90..29642d3d 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -111,10 +111,13 @@ sub start { $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; } + my ($user) = $> == 1 ? "user root root;" : ""; + print $fh qq{ pid $pidFile; worker_processes 15; error_log /dev/null crit; + $user events { worker_connections 1024; } From 7c89846c9119efddb29e99665dc9e483f4831ef7 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Wed, 17 Oct 2012 21:28:16 +0300 Subject: [PATCH 296/405] Fix "skip_devcount" during rebalance We were updating devcount field even when skip_devcount was true. We should not use $sto here because we already have FID object and nice method available for this. Signed-off-by: Eric Wong --- lib/MogileFS/DevFID.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 92a798cf..b004bb07 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -170,7 +170,7 @@ sub destroy { my $sto = Mgd::get_store(); $sto->remove_fidid_from_devid($self->fidid, $self->devid); - $sto->update_devcount($self->fidid); + $self->fid->update_devcount(no_lock => 1); } 1; From 7bc3c354d4c3044b1ef38d27f38625b716f26ba3 Mon Sep 17 00:00:00 2001 From: Pyry Hakulinen Date: Thu, 1 Nov 2012 06:31:35 -0700 Subject: [PATCH 297/405] fix use_dest_devs for rebalance The caller expects an array ref, currently using use_dest_devs will kill JobMaster with: Oct 18 09:45:25 storage22 mogilefsd[23263]: crash log: rebalance cannot find suitable destination devices at /usr/local/share/perl/5.10.1/MogileFS/Worker/JobMaster.pm line 233 Oct 18 09:45:26 storage22 mogilefsd[22044]: Child 23263 (job_master) died: 256 (UNEXPECTED) Signed-off-by: Eric Wong --- lib/MogileFS/Rebalance.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 1f928fa5..9a17ca9b 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -309,7 +309,7 @@ sub _choose_dest_devs { my @shuffled_devs = List::Util::shuffle(@$filtered_devs); return \@shuffled_devs if ($p->{use_dest_devs} eq 'all'); - return splice @shuffled_devs, 0, $p->{use_dest_devs}; + return [splice @shuffled_devs, 0, $p->{use_dest_devs}]; } # Iterate through all possible constraints until we have a final list. From ff6ac2c04ee51ce7ca0ad562bc8d4eb66a78aade Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 17 Jul 2012 11:21:22 +0000 Subject: [PATCH 298/405] prevent reqid mismatches (and queryworker death) On certain errors, the queryworker may send two "ERR" responses, causing the ProcManager to terminate the queryworker upon reading the second response if the queryworker is immediately fed another query. This can affect busy setups, but is also easy to reproduce with a single queryworker that's receiving a pipelined request to an invalid/non-existent domain: ( printf 'list_keys domain=\r\nlist_keys domain=\r\n' sleep 2 ) | socat - TCP:127.0.0.1:7001 The queryworker strace will look like this (writing 4 lines): write(14, "4981-1 0.0005 ERR no_domain No+domain+provided\r\n", 48) = 48 write(14, "4981-1 ERR domain_not_found Domain+not+found\r\n", 46) = 46 write(14, "4981-2 0.0005 ERR no_domain No+domain+provided\r\n", 48) = 48 write(14, "4981-2 ERR domain_not_found Domain+not+found\r\n", 46) = 46 And a message like this will appear for "!watch" users: Worker responded with id (line: [4981-1 ERR domain_not_found Domain+not+found]), but expected id 4981-2, killing This is because ProgManager immediately calls NoteIdleQueryWorker upon writing the first ERR response to the client (at the end of HandleQueryWorkerResponse). This means the idle query worker may immediately start processing a second request before the ProcManager has a chance to process the second ERR response line (from the first request). Preventing err_line() from calling send_to_parent() with "ERR" if querystarttime is undef prevents this issue, but there may be better ways to fix this bug. A similar, preventative fix may be appropriate for ok_line(). --- lib/MogileFS/Worker/Query.pm | 3 +++ t/00-startup.t | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 4c5f252f..f88fdfee 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1783,6 +1783,9 @@ sub err_line { if ($self->{querystarttime}) { $delay = sprintf("%.4f ", Time::HiRes::tv_interval($self->{querystarttime})); $self->{querystarttime} = undef; + } else { + # don't send another ERR line if we already sent one + return 0; } my $id = defined $self->{reqid} ? "$self->{reqid} " : ''; diff --git a/t/00-startup.t b/t/00-startup.t index 5aba5b9d..d602b5cd 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -82,6 +82,31 @@ ok($be->do_request("test", {}), "test ping worked"); ok(!$be->do_request("test", {crash => 1}), "crash didn't"); ok($be->do_request("test", {}), "test ping again worked"); +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + $c->syswrite("!want 1 queryworker\r\n"); + my $res1 = <$c> . <$c>; + like($res1, qr/Now desiring 1 children doing 'queryworker'/, "set 1 queryworker"); + + my $expect = "ERR no_domain No+domain+provided\r\n" x 2; + + # bad domain won't return twice + my $cmd = "list_keys domain=\r\n"; + $c->syswrite($cmd x 2); + my $r; + my $resp = ""; + do { + $r = $c->sysread(my $buf, 500); + if (defined $r && $r > 0) { + $resp .= $buf; + } + } while ($r && length($resp) != length($expect)); + is($resp, $expect, "response matches expected"); + + $c->syswrite("!want 2 queryworker\r\n"); + my $res2 = <$c> . <$c>; + like($res2, qr/Now desiring 2 children doing 'queryworker'/, "restored 2 queryworkers"); +} ok($tmptrack->mogadm("domain", "add", "todie"), "created todie domain"); ok($tmptrack->mogadm("domain", "delete", "todie"), "delete todie domain"); From 3cebc8759a22692c37061ef7866545fa7fe7d516 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 6 Nov 2012 23:45:19 +0000 Subject: [PATCH 299/405] test: expose try_for() as a common test function This saves us from reinventing it in every test and will help us detect stuck tests more easily. --- lib/MogileFS/Test.pm | 11 ++++++++++- t/00-startup.t | 9 --------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 70194272..4270268b 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -9,7 +9,7 @@ use IO::Socket::INET; use MogileFS::Server; use base 'Exporter'; -our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker); +our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker &try_for); sub find_mogclient_or_skip { @@ -152,6 +152,15 @@ sub create_mogstored { return undef; } +sub try_for { + my ($tries, $code) = @_; + for (1..$tries) { + return 1 if $code->(); + sleep 1; + } + return 0; +} + ############################################################################ package ProcessHandle; sub new { diff --git a/t/00-startup.t b/t/00-startup.t index d602b5cd..263a185f 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -392,13 +392,4 @@ foreach my $t (qw(file file_on file_to_delete)) { is($info->{fid}, $opts->{fid}, "explicit fid is correctly set"); } -sub try_for { - my ($tries, $code) = @_; - for (1..$tries) { - return 1 if $code->(); - sleep 1; - } - return 0; -} - done_testing(); From 3be61cfe3d2d88b5fb633d157f4e15be8fa2c13d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Nov 2012 00:18:23 +0000 Subject: [PATCH 300/405] t/50-checksum.t: use common try_for() function I've had this test get stuck intermittently in different places, this should make it easier to track down stuck tests. --- t/50-checksum.t | 72 ++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/t/50-checksum.t b/t/50-checksum.t index e32487b7..b6719241 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -3,7 +3,6 @@ use strict; use warnings; use Test::More; use FindBin qw($Bin); -use Time::HiRes qw(sleep); use MogileFS::Server; use MogileFS::Test; use HTTP::Request; @@ -30,11 +29,10 @@ ok($ms1, "got mogstored1"); my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); ok($ms1, "got mogstored2"); -while (! -e "$mogroot{1}/dev1/usage" && - ! -e "$mogroot{2}/dev2/usage") { +try_for(30, sub { print "Waiting on usage...\n"; - sleep(.25); -} + -e "$mogroot{1}/dev1/usage" && -e "$mogroot{2}/dev2/usage"; +}); my $tmptrack = create_temp_tracker($sto); ok($tmptrack); @@ -175,9 +173,10 @@ use Digest::MD5 qw/md5_hex/; ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); # wait for replicate to recreate checksum - do { + try_for(30, sub { @paths = $mogc->get_paths($key); - } while (scalar(@paths) == 1 and sleep(0.1)); + scalar(@paths) != 1; + }); is(scalar(@paths), 2, "replicate successfully with good checksum"); $info = $mogc->file_info($key); @@ -221,9 +220,10 @@ use Digest::MD5 qw/md5_hex/; syswrite($admin, "!want 1 replicate\n"); # disable replication ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); - do { + try_for(30, sub { @paths = $mogc->get_paths($key); - } while (scalar(@paths) == 1 && sleep(0.1)); + scalar(@paths) != 1; + }); is(scalar(@paths), 2, "replicate successfully with good checksum"); $info = $mogc->file_info($key); @@ -239,9 +239,10 @@ use Digest::MD5 qw/md5_hex/; is($info->{checksum}, "MISSING", "checksum is missing"); full_fsck($tmptrack); - do { + try_for(30, sub { $info = $mogc->file_info($key); - } while ($info->{checksum} eq "MISSING" && sleep(0.1)); + $info->{checksum} ne "MISSING"; + }); is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after fsck'); @fsck_log = $sto->fsck_log_rows; @@ -265,17 +266,20 @@ use Digest::MD5 qw/md5_hex/; full_fsck($tmptrack); - do { + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 && sleep(0.1)); + scalar(@fsck_log) != 0; + }); is(scalar(@fsck_log), 1, "fsck log has one row"); is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); is($fsck_log[0]->{evcode}, "REPL", "repl for mismatched checksum logged"); - do { + try_for(30, sub { @paths = $mogc->get_paths($key); - } while (scalar(@paths) < 2 && sleep(0.1)); + scalar(@paths) >= 2; + }); + is(scalar(@paths), 2, "2 paths for key after replication"); is($ua->get($paths[0])->content, "lazy", "paths[0] is correct"); is($ua->get($paths[1])->content, "lazy", "paths[1] is correct"); @@ -304,9 +308,10 @@ use Digest::MD5 qw/md5_hex/; full_fsck($tmptrack); - do { + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 && sleep(0.1)); + scalar(@fsck_log) != 0; + }); is(scalar(@fsck_log), 1, "fsck log has one row"); is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); @@ -329,19 +334,17 @@ use Digest::MD5 qw/md5_hex/; print $fh "HAZY"; ok(close($fh), "closed replacement file (lazycksum => HAZY)"); - while ($sto->get_checksum($info->{fid})) { - sleep(0.5); - print "waiting...\n"; - } + try_for(30, sub { ! $sto->get_checksum($info->{fid}); }); is($sto->get_checksum($info->{fid}), undef, "old checksum is gone"); } # completely corrupted files with no checksum row { my $key = 'lazycksum'; - do { + try_for(30, sub { @paths = $mogc->get_paths($key); - } while (scalar(@paths) < 2 && sleep(0.1)); + scalar(@paths) >= 2; + }); is(scalar(@paths), 2, "replicated succesfully"); my $info = $mogc->file_info($key); @@ -365,9 +368,10 @@ use Digest::MD5 qw/md5_hex/; full_fsck($tmptrack); - do { + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 && sleep(0.1)); + scalar(@fsck_log) != 0; + }); is(scalar(@fsck_log), 1, "fsck log has one row"); is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); @@ -391,9 +395,11 @@ use Digest::MD5 qw/md5_hex/; ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "MD5"), "enable fsck_checksum=MD5"); wait_for_monitor($be); full_fsck($tmptrack); - do { + + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 && sleep(0.1)); + scalar(@fsck_log) != 0; + }); is(scalar(@fsck_log), 1, "fsck log has one row"); is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); is($fsck_log[0]->{evcode}, "MSUM", "MSUM logged"); @@ -421,11 +427,10 @@ use MogileFS::Config; is($settings->{fsck_checksum}, 'off', "fsck_checksum server setting visible"); full_fsck($tmptrack); my $nr; - foreach my $i (0..100) { + try_for(1000, sub { $nr = $sto->file_queue_length(FSCK_QUEUE); - last if ($nr eq '0'); - sleep 0.1; - } + $nr eq '0'; + }); is($nr, '0', "fsck finished"); @fsck_log = $sto->fsck_log_rows; is(scalar(@fsck_log), 0, "fsck log is empty with fsck_checksum=off"); @@ -440,9 +445,10 @@ use MogileFS::Config; ok(! defined($settings->{fsck_checksum}), "fsck_checksum=class server setting hidden (default)"); full_fsck($tmptrack); - do { + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; - } while (scalar(@fsck_log) == 0 && sleep(0.1)); + scalar(@fsck_log) != 0; + }); is(scalar(@fsck_log), 1, "fsck log has one row"); is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); From 9614260123d92894d29dbcd36af36e0adfa9d364 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Nov 2012 00:26:30 +0000 Subject: [PATCH 301/405] t/50-checksum.t: ensure replicate worker is really down Workers do not receive nor respond to messages as soon as the ProcManager dispatches the request to stop/start them, so wait until ProcManager no longer knows about a process before proceeding. --- t/50-checksum.t | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/t/50-checksum.t b/t/50-checksum.t index b6719241..c9450b6f 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -68,6 +68,28 @@ sub wait_for_monitor { $be->{timeout} = $was; } +sub stop_replicate { + my ($admin) = @_; + syswrite($admin, "!want 0 replicate\r\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabling replicate"); + + my $count; + try_for(30, sub { + $count = -1; + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + if ($line =~ /\Areplicate count (\d+)/) { + $count = $1; + } + last if $line eq ".\r\n"; + } + $count == 0; + }); + is($count, 0, "replicate count is zero"); +} + sub full_fsck { my $tmptrack = shift; @@ -143,8 +165,7 @@ use Digest::MD5 qw/md5_hex/; { my $key = 'savecksum'; - syswrite($admin, "!want 0 replicate\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabled replicate"); + stop_replicate($admin); %opts = ( domain => "testdom", class => "2copies", key => $key ); $rv = $be->do_request("create_open", \%opts); @@ -208,8 +229,7 @@ use Digest::MD5 qw/md5_hex/; { my $key = 'lazycksum'; - syswrite($admin, "!want 0 replicate\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabled replicate"); + stop_replicate($admin); my $fh = $mogc->new_file($key, "2copies"); print $fh "lazy"; From 9457434502017417fca22baa02d99920b10263e1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Nov 2012 21:35:09 +0000 Subject: [PATCH 302/405] checksum: avoid comparison on uninitialized value $class->{hashtype} is undef by default for classes where no checksums are configured. Since clients can force checksum verification in create_close regardless of class, we can end up with a Checksum object for FIDs regardless of which class the FID is in. --- lib/MogileFS/Checksum.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm index 86b5ae31..a341304e 100644 --- a/lib/MogileFS/Checksum.pm +++ b/lib/MogileFS/Checksum.pm @@ -76,7 +76,9 @@ sub maybe_save { # $class may be undef as it could've been deleted between # create_open and create_close, we've never verified this before... - if ($class && $self->{hashtype} eq $class->{hashtype}) { + # class->{hashtype} is also undef, as we allow create_close callers + # to specify a hash regardless of class. + if ($class && defined($class->{hashtype}) && $self->{hashtype} eq $class->{hashtype}) { $self->save; } } From ff5a3da3f1c853ac89138ca9e1b597ec79d9f63f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 10 Nov 2012 04:17:40 +0000 Subject: [PATCH 303/405] query: allow "0" key on all commands which take keys delete, file_info, get_paths, rename, file_debug, updateclass were all broken when handling a key named "0". --- lib/MogileFS/Worker/Query.pm | 33 +++++++++++++++++++++++++-------- t/00-startup.t | 27 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index f88fdfee..a5fbccc4 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -346,6 +346,12 @@ sub sort_devs_by_freespace { return @list; } +sub valid_key { + my ($key) = @_; + + return defined($key) && length($key); +} + sub cmd_create_close { my MogileFS::Worker::Query $self = shift; my $args = shift; @@ -399,7 +405,7 @@ sub cmd_create_close { # if a temp file is closed without a provided-key, that means to # delete it. - unless (defined $key && length($key)) { + unless (valid_key($key)) { $failed->(); return $self->ok_line; } @@ -486,7 +492,8 @@ sub cmd_updateclass { or return $self->err_line('domain_not_found'); my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + valid_key($key) or return $self->err_line("no_key"); my $class = $args->{class} or return $self->err_line("no_class"); my $classid = eval { Mgd::class_factory()->get_by_name($dmid, $class)->id } @@ -521,7 +528,9 @@ sub cmd_delete { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # is this fid still owned by this key? my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key) @@ -551,7 +560,7 @@ sub cmd_file_debug { # If not, require dmid/dkey and pick up the fid from there. $args->{dmid} = $self->check_domain($args) or return $self->err_line('domain_not_found'); - return $self->err_line("no_key") unless $args->{key}; + return $self->err_line("no_key") unless valid_key($args->{key}); # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_file_debug', $args); @@ -625,7 +634,9 @@ sub cmd_file_info { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); my $fid; Mgd::get_store()->slaves_ok(sub { @@ -745,7 +756,9 @@ sub cmd_rename { my $dmid = $self->check_domain($args) or return $self->err_line('domain_not_found'); my ($fkey, $tkey) = ($args->{from_key}, $args->{to_key}); - return $self->err_line("no_key") unless $fkey && $tkey; + unless (valid_key($fkey) && valid_key($tkey)) { + return $self->err_line("no_key"); + } my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $fkey) or return $self->err_line("unknown_key"); @@ -1081,7 +1094,9 @@ sub cmd_get_paths { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # We default to returning two possible paths. # but the client may ask for more if they want. @@ -1258,7 +1273,9 @@ sub cmd_edit_file { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # get DB handle my $fid; diff --git a/t/00-startup.t b/t/00-startup.t index 263a185f..ce8eaf9a 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -392,4 +392,31 @@ foreach my $t (qw(file file_on file_to_delete)) { is($info->{fid}, $opts->{fid}, "explicit fid is correctly set"); } +{ + my $fh = $mogc->new_file("0", "1copy"); + ok((print $fh "zero\n"), "wrote to file"); + ok(close($fh), "closed file"); + + my $info = $mogc->file_info("0"); + is("HASH", ref($info), "file_info returned a hash"); + is("0", $info->{key}, "key matches 0"); + is("1copy", $info->{class}, "class matches for 0 key"); + + my @paths = $mogc->get_paths("0"); + is(1, scalar(@paths), "path returned for 0"); + + $mogc->rename("0", "zero"); + is($info->{fid}, $mogc->file_info("zero")->{fid}, "rename from 0"); + $mogc->rename("zero", "0"); + is($info->{fid}, $mogc->file_info("0")->{fid}, "rename to 0"); + $mogc->update_class("0", "2copies"); + is("2copies", $mogc->file_info("0")->{class}, "class updated for 0 key"); + + my $debug = $mogc->file_debug(key => "0"); + is($debug->{fid_fid}, $info->{fid}, "FID from debug matches"); + is($debug->{fid_dkey}, "0", "key from debug matches"); + + ok($mogc->delete("0"), "delete 0 works"); +} + done_testing(); From 7851e441bb416ec5df73a68d19cdea4b61c80bde Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 10 Dec 2012 23:40:57 +0000 Subject: [PATCH 304/405] fsck: use replicate lock when fixing FID We need to ensure neither replicate (nor delete) are changing the devids list when fixing an FID. This should ensure we're safely modifying the devid list for a given FID when forgetting about bad ones. --- lib/MogileFS/Worker/Fsck.pm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 879df679..d90da19e 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -106,9 +106,21 @@ sub check_fid { my ($self, $fid) = @_; my $fix = sub { + # we cached devids without locking for the fast path, + # ensure we get an up-to-date list in the slow path. + $fid->forget_cached_devids; + + my $sto = Mgd::get_store(); + unless ($sto->should_begin_replicating_fidid($fid->id)) { + error("Fsck stalled for fid $fid: failed to acquire lock"); + return STALLED; + } + my $fixed = eval { $self->fix_fid($fid) }; + my $err = $@; + $sto->note_done_replicating($fid->id); if (! defined $fixed) { - error("Fsck stalled for fid $fid: $@"); + error("Fsck stalled for fid $fid: $err"); return STALLED; } $fid->fsck_log(EV_CANT_FIX) if ! $fixed; From a549b002e7090a1e318a1577470bd1aaec195400 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Dec 2012 00:39:38 +0000 Subject: [PATCH 305/405] fsck: skip non-existent FIDs properly We should not waste time stat()-ing FIDs that no longer exist at all. --- lib/MogileFS/Worker/Fsck.pm | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index d90da19e..a1a1b654 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -59,11 +59,12 @@ sub work { my @fids = (); while (my $todo = shift @{$queue_todo}) { my $fid = MogileFS::FID->new($todo->{fid}); - unless ($fid->exists) { + if ($fid->exists) { + push(@fids, $fid); + } else { # FID stopped existing before being checked. $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); } - push(@fids, $fid); } return unless @fids; @@ -116,6 +117,13 @@ sub check_fid { return STALLED; } + unless ($fid->exists) { + # FID stopped existing while doing (or waiting on) + # the fast check, give up on this fid + $sto->note_done_replicating($fid->id); + return HANDLED; + } + my $fixed = eval { $self->fix_fid($fid) }; my $err = $@; $sto->note_done_replicating($fid->id); From 25e344a71c4f2244c3c7c471186060f2e4da24f6 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 13 Dec 2012 21:26:41 +0000 Subject: [PATCH 306/405] improve handling of classes which change hash algorithm In replicate, we validate via existing FID checksum regardless of class. This failed when the class.hashtype was altered after uploading but before replication. The following sequence of events caused replication to fail: 1. class.hashtype = MD5 2. FID created and stores MD5:... 3. FID enqueued for replication 4. class.hashtype = NONE 5. FID begins replicating Replication (Step 5) failed since the existing MD5 digest is trying to validate against a (now) non-existent class digest. Since we stored the checksum in the database anyways, calculate and validate anyways as an admin could've only wanted to alter a class temporarily, not permanently. An admin may also decide to switch checksum algorithms. Fsck now logs hash algorithm mismatches as "BALG" and emits a descriptive message to syslog --- MANIFEST | 1 + lib/MogileFS/Worker/Fsck.pm | 14 ++- lib/MogileFS/Worker/Replicate.pm | 8 +- t/51-checksum_class_change.t | 162 +++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 t/51-checksum_class_change.t diff --git a/MANIFEST b/MANIFEST index eb4cb987..2a5a61fb 100644 --- a/MANIFEST +++ b/MANIFEST @@ -75,6 +75,7 @@ t/20-filepaths.t t/30-rebalance.t t/40-httpfile.t t/50-checksum.t +t/51-checksum_class_change.t t/60-fsck.t t/70-reaper.t t/checksum.t diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index a1a1b654..1c0b9eba 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -29,6 +29,7 @@ use constant EV_BAD_COUNT => "BCNT"; use constant EV_BAD_CHECKSUM => "BSUM"; use constant EV_NO_CHECKSUM => "NSUM"; use constant EV_MULTI_CHECKSUM => "MSUM"; +use constant EV_BAD_HASHTYPE => "BALG"; use POSIX (); @@ -333,7 +334,7 @@ sub fix_fid { # in case the devcount or similar was fixed. $fid->want_reload; - $self->fix_checksums($fid, $checksums) if $alg && $alg ne "off"; + $self->fix_checksums($fid, $alg, $checksums) if $alg && $alg ne "off"; # Note: this will reload devids, if they called 'note_on_device' # or 'forget_about_device' @@ -403,7 +404,7 @@ sub all_checksums_bad { } sub fix_checksums { - my ($self, $fid, $checksums) = @_; + my ($self, $fid, $alg, $checksums) = @_; my $cur_checksum = $fid->checksum; my @all_checksums = keys(%$checksums); @@ -411,7 +412,14 @@ sub fix_checksums { my $disk_checksum = $all_checksums[0]; if ($cur_checksum) { if ($cur_checksum->{checksum} ne $disk_checksum) { - $fid->fsck_log(EV_BAD_CHECKSUM); + my $expect = $cur_checksum->info; + my $actual = "$alg:" . unpack("H*", $disk_checksum); + error("$cur_checksum does not match disk: $actual"); + if ($alg ne $cur_checksum->hashname) { + $fid->fsck_log(EV_BAD_HASHTYPE); + } else { + $fid->fsck_log(EV_BAD_CHECKSUM); + } } } else { # fresh row to checksum my $hashtype = $fid->class->hashtype; diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 247e07a5..5806a6b6 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -469,7 +469,11 @@ sub replicate { } my $worker = MogileFS::ProcManager->is_child or die; - my $digest = Digest->new($cls->hashname) if $cls->hashtype; + my $digest; + my $fid_checksum = $fid->checksum; + $digest = Digest->new($fid_checksum->hashname) if $fid_checksum; + $digest ||= Digest->new($cls->hashname) if $cls->hashtype; + my $rv = http_copy( sdevid => $sdevid, ddevid => $ddevid, @@ -698,7 +702,7 @@ sub http_copy { if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { if ($1 >= 200 && $1 <= 299) { if ($digest) { - my $alg = $rfid->class->hashname; + my $alg = ($fid_checksum && $fid_checksum->hashname) || $rfid->class->hashname; if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { # dest device would've rejected us with a error, diff --git a/t/51-checksum_class_change.t b/t/51-checksum_class_change.t new file mode 100644 index 00000000..27f4b161 --- /dev/null +++ b/t/51-checksum_class_change.t @@ -0,0 +1,162 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); +use MogileFS::Admin; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms1, "got mogstored2"); + +try_for(30, sub { + print "Waiting on usage...\n"; + -e "$mogroot{1}/dev1/usage" && -e "$mogroot{2}/dev2/usage"; +}); + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "changer", "--mindevcount=2", "--hashtype=MD5"), "created changer class in testdom with hashtype=MD5"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +sub stop_replicate { + my ($admin) = @_; + syswrite($admin, "!want 0 replicate\r\n"); # disable replication + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabling replicate"); + + my $count; + try_for(30, sub { + $count = -1; + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + if ($line =~ /\Areplicate count (\d+)/) { + $count = $1; + } + last if $line eq ".\r\n"; + } + $count == 0; + }); + is($count, 0, "replicate count is zero"); +} + +wait_for_monitor($be); +stop_replicate($admin); + +my ($req, $rv, %opts, @paths, @fsck_log); +my $ua = LWP::UserAgent->new; + +use Data::Dumper; +use Digest::MD5 qw/md5_hex/; + +my $key = "foo"; +{ + %opts = ( domain => "testdom", class => "changer", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + ok($sto->get_checksum($opts{fid}), "checksum saved"); + ok($mogc->file_info($key), "file_info($key) is sane"); +} + +# disable MD5 checksums in "changer" class +{ + %opts = ( domain => "testdom", class => "changer", + hashtype => "NONE", mindevcount => 2); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# replicate should work even if we have, but don't need a checksum anymore +{ + syswrite($admin, "!want 1 replicate\n"); + ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + + # wait for replicate to recreate checksum + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) != 1; + }); + is(scalar(@paths), 2, "replicated successfully"); + stop_replicate($admin); +} + +# switch to SHA-1 checksums in "changer" class +{ + %opts = ( domain => "testdom", class => "changer", + hashtype => "SHA-1", mindevcount => 2); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +{ + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); + + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; }); + is($fsck_log[0]->{evcode}, "BALG", "bad checksum algorithm logged"); +} + +done_testing(); From 811c621ff194416de5141e4db37e8ae4818a9539 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 18 Dec 2012 00:12:04 +0000 Subject: [PATCH 307/405] reaper: validate DB connection before reaping This helps prevent the reaper process from dying if the DB disconnected us for idleness. This should fix #75: ("reaper dies if DB connection closes") http://code.google.com/p/mogilefs/issues/detail?id=75 --- lib/MogileFS/Worker/Reaper.pm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 3f0866d1..21c22696 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -73,6 +73,14 @@ sub reaper_inject_limit { # forever. $delay is the current delay we were scheduled at sub reap_dev { my ($self, $devid, $delay) = @_; + + # ensure the master DB is up, retry in REAP_INTERVAL if down + unless ($self->validate_dbh) { + $delay = REAP_INTERVAL; + Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); + return; + } + my $limit = $self->reaper_inject_limit; # just in case a user mistakenly nuked a devid from the device table: From f79d69eff8c7ead5ad9069b2e010d22c84fd0ab7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 6 Jan 2013 08:39:15 +0000 Subject: [PATCH 308/405] t/30-rebalance: remove redundant try_for() function I missed this when moving try_for() into Test.pm --- t/30-rebalance.t | 9 --------- 1 file changed, 9 deletions(-) diff --git a/t/30-rebalance.t b/t/30-rebalance.t index 8219269a..c6b6d990 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -267,13 +267,4 @@ sleep 3; # - fiddle mbused/mbfree for devices and test the percentages # - test move limits (count, size, etc) -sub try_for { - my ($tries, $code) = @_; - for (1..$tries) { - return 1 if $code->(); - sleep 1; - } - return 0; -} - done_testing(); From d9da3b22f267ef24afc3b488457f2bf083b1ce3f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 6 Jan 2013 08:47:48 +0000 Subject: [PATCH 309/405] t/00-startup: fix updateclass test We called MogileFS::Client::update_class incorrectly without the key. Additionally, the test for checking the number of copies was also incorrect. --- t/00-startup.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/00-startup.t b/t/00-startup.t index ce8eaf9a..1f2d2781 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -239,13 +239,13 @@ for (1..10) { is scalar($mogc->get_paths("file1copy")), 1, 'File is on 1 device'; - $mogc->update_class('2copies'); + ok($mogc->update_class('file1copy', '2copies'), "updated class to 2 copies"); # wait for it to replicate ok(try_for(10, sub { my @urls = $mogc->get_paths("file1copy"); my $nloc = @urls; - if ($nloc < 1) { + if ($nloc < 2) { diag("no_content still only on $nloc devices"); return 0; } From affc0654af6eaae1d771a9e05d0ffe4f93061ea2 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 1 Dec 2012 11:42:04 -0500 Subject: [PATCH 310/405] support updating the class to the default class which has an id of 0 [ew: added trivial test] Signed-off-by: Eric Wong --- lib/MogileFS/Worker/Query.pm | 3 ++- t/00-startup.t | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index a5fbccc4..c437a3a2 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -496,8 +496,9 @@ sub cmd_updateclass { valid_key($key) or return $self->err_line("no_key"); my $class = $args->{class} or return $self->err_line("no_class"); - my $classid = eval { Mgd::class_factory()->get_by_name($dmid, $class)->id } + my $classobj = Mgd::class_factory()->get_by_name($dmid, $class) or return $self->err_line('class_not_found'); + my $classid = $classobj->id; my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key) or return $self->err_line('invalid_key'); diff --git a/t/00-startup.t b/t/00-startup.t index 1f2d2781..c532b83d 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -252,6 +252,8 @@ for (1..10) { return 1; }), "replicated to 2 paths"); + ok($mogc->update_class('file1copy', 'default'), "updated class to default"); + ok($mogc->delete("file1copy"), "deleted updateclass testfile file1copy") or die "Error: " . $mogc->errstr; } From 79e1c5b7ef53f922de215b64da8756e9c3d6fee6 Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Sat, 1 Dec 2012 03:45:54 -0500 Subject: [PATCH 311/405] add a hook to cmd_updateclass Signed-off-by: Eric Wong --- lib/MogileFS/Worker/Query.pm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index c437a3a2..9fc8c6e2 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -491,6 +491,10 @@ sub cmd_updateclass { $args->{dmid} = $self->check_domain($args) or return $self->err_line('domain_not_found'); + # call out to a hook that might modify the arguments for us, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_updateclass', $args); + return $self->err_line('plugin_aborted') if defined $rv && ! $rv; + my $dmid = $args->{dmid}; my $key = $args->{key}; valid_key($key) or return $self->err_line("no_key"); From 37ab849a4cb75db0a2492cad078efa73b495c594 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 6 Jan 2013 19:22:15 -0800 Subject: [PATCH 312/405] Checking in changes prior to tagging of version 2.66. Changelog diff is: diff --git a/CHANGES b/CHANGES index 64455f4..bd7e38a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2013-01-06: Release version 2.66 + + * add a hook to cmd_updateclass (Daniel Frett ) + + * support updating the class to the default class which has an id of 0 (Daniel Frett ) + + * reaper: validate DB connection before reaping (Eric Wong ) + Fixes occasional crash in reaper process. + + * improve handling of classes which change hash algorithm (Eric Wong ) + + * fsck: skip non-existent FIDs properly (Eric Wong ) + + * fsck: use replicate lock when fixing FID (Eric Wong ) + + * query: allow "0" key on all commands which take keys (Eric Wong ) + + * prevent reqid mismatches (and queryworker death) (Eric Wong ) + Fixes crash case with specific error types. + + * fix use_dest_devs for rebalance (Pyry Hakulinen ) + Fixes "use_dest_devs" argument during rebalance. + + * Fix "skip_devcount" during rebalance (Pyry Hakulinen ) + Now actually skips updating devcount column during rebalance. + 2012-08-13: Release version 2.65 * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) --- CHANGES | 26 ++++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 64455f42..bd7e38a6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2013-01-06: Release version 2.66 + + * add a hook to cmd_updateclass (Daniel Frett ) + + * support updating the class to the default class which has an id of 0 (Daniel Frett ) + + * reaper: validate DB connection before reaping (Eric Wong ) + Fixes occasional crash in reaper process. + + * improve handling of classes which change hash algorithm (Eric Wong ) + + * fsck: skip non-existent FIDs properly (Eric Wong ) + + * fsck: use replicate lock when fixing FID (Eric Wong ) + + * query: allow "0" key on all commands which take keys (Eric Wong ) + + * prevent reqid mismatches (and queryworker death) (Eric Wong ) + Fixes crash case with specific error types. + + * fix use_dest_devs for rebalance (Pyry Hakulinen ) + Fixes "use_dest_devs" argument during rebalance. + + * Fix "skip_devcount" during rebalance (Pyry Hakulinen ) + Now actually skips updating devcount column during rebalance. + 2012-08-13: Release version 2.65 * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 6d0d4c8f..044119c7 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.65"; +$VERSION = "2.66"; =head1 NAME From b6568751f2e9ec64253e5aa6e10e071185eb4c1b Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Tue, 8 Jan 2013 10:46:02 +0000 Subject: [PATCH 313/405] Reseed the random number generator after forking. Signed-off-by: Eric Wong --- lib/MogileFS/ProcManager.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 34228fb4..d2acb62f 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -236,6 +236,9 @@ sub make_new_child { return $worker_conn; } + # let children have different random number seeds + srand(); + # as a child, we want to close these and ignore them $_->() foreach @prefork_cleanup; close($parents_ipc); From 70c8d58eb2b54dcab7c02634b56a74bffd6ba9fd Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Tue, 8 Jan 2013 16:09:59 +0000 Subject: [PATCH 314/405] Pull out device sorting into it's own method for overriding. Signed-off-by: Eric Wong --- .../ReplicationPolicy/MultipleHosts.pm | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm index 84a569d3..1c9b5157 100644 --- a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm +++ b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm @@ -97,12 +97,8 @@ sub replicate_to { return TEMP_NO_ANSWER if $already_on >= $min && @ideal == 0; - # Do this little dance to only weight-shuffle the top end of empty devices - # to save CPU. - @ideal = weighted_list(map { [$_, 100 * $_->percent_free] } - splice(@ideal, 0, 20)); - @desp = weighted_list(map { [$_, 100 * $_->percent_free] } - splice(@desp, 0, 20)); + @ideal = $self->sort_devices(\@ideal, $fid); + @desp = $self->sort_devices(\@desp, $fid); return MogileFS::ReplicationRequest->new( ideal => \@ideal, @@ -121,6 +117,16 @@ sub unique_hosts { return scalar keys %host; } +sub sort_devices { + my ($self, $devs) = @_; + + # Do this little dance to only weight-shuffle the top end of empty devices + # to save CPU. + + return weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@{ $devs }, 0, 20)); +} + 1; # Local Variables: From 818f8022be6f54f74970f9c1c9f3005e1b273cd8 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Tue, 8 Jan 2013 16:20:46 +0000 Subject: [PATCH 315/405] Do both sorts in one method, to save on shared initialisation. Signed-off-by: Eric Wong --- lib/MogileFS/ReplicationPolicy/MultipleHosts.pm | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm index 1c9b5157..946be0d6 100644 --- a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm +++ b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm @@ -97,8 +97,7 @@ sub replicate_to { return TEMP_NO_ANSWER if $already_on >= $min && @ideal == 0; - @ideal = $self->sort_devices(\@ideal, $fid); - @desp = $self->sort_devices(\@desp, $fid); + $self->sort_devices(\@ideal, \@desp, $fid); return MogileFS::ReplicationRequest->new( ideal => \@ideal, @@ -118,13 +117,18 @@ sub unique_hosts { } sub sort_devices { - my ($self, $devs) = @_; + my ($self, $ideal, $desp, $fid) = @_; # Do this little dance to only weight-shuffle the top end of empty devices # to save CPU. - return weighted_list(map { [$_, 100 * $_->percent_free] } - splice(@{ $devs }, 0, 20)); + @{ $ideal } = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@{ $ideal }, 0, 20)); + + @{ $desp } = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@{ $desp }, 0, 20)); + + return; } 1; From 92046c128ed0ca48381e836196d9f65e69680478 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 6 Jan 2012 04:18:15 +0000 Subject: [PATCH 316/405] worker: set monitor_has_run flag at initialization This way the queryworker will know it won't have to wait again for the monitor to run. This allows users to (manually) set higher intervals in Monitor.pm without noticing ill effects. --- lib/MogileFS/Worker.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index a4e77bdc..d077df01 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -26,7 +26,7 @@ sub new { $self->{psock} = $psock; $self->{readbuf} = ''; $self->{last_bcast_state} = {}; - $self->{monitor_has_run} = 0; + $self->{monitor_has_run} = MogileFS::ProcManager->is_monitor_good; $self->{last_ping} = 0; $self->{last_wake} = {}; $self->{queue_depth} = {}; From decedfedd283795e7dc3514170d343cb08b2c769 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Sep 2012 23:21:40 +0000 Subject: [PATCH 317/405] monitor: remove dead iostats code/comments * set_observed_utilization() is a no-op, and can be safely removed. * Looking up the device via factory does not incur DB hit since the factory changes of May 2011. * Really avoids propagating invalid devids with correct ordering of the hash assignment. This prevents an invalid devid from hitting even the {devutil}->{cur} hash which lasts the lifetime of the monitor process. --- lib/MogileFS/Device.pm | 5 ----- lib/MogileFS/Worker/Monitor.pm | 5 +---- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index f1a7bc17..4a253227 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -299,11 +299,6 @@ sub vivify_directories { $self->create_directory("/dev$devid/$p1/$p2/$p3"); } -# FIXME: Remove this once vestigial code is removed. -sub set_observed_utilization { - return 1; -} - # Compatibility interface since this old routine is unfortunately called # internally within plugins. This data should be passed into any hooks which # may need it? diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index fabe0218..7a547096 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -127,12 +127,9 @@ sub work { while (my ($devid, $util) = each %$stats) { # Lets not propagate devices that we accidentally find. - # This does hit the DB every time a device does not exist, so - # perhaps should add negative caching in the future. - $self->{devutil}->{cur}->{$devid} = $util; my $dev = Mgd::device_factory()->get_by_id($devid); next unless $dev; - $dev->set_observed_utilization($util); + $self->{devutil}->{cur}->{$devid} = $util; } }); From ca46b42404eccfaed9b8e7833d9f5e82035a1eba Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 10 Sep 2012 17:21:52 -0700 Subject: [PATCH 318/405] ProcManager: favor using recently-used queryworkers As HTTP/1.1 servers tend to disconnect idle connections over time, recently-used queryworkers are more likely to have reusable HTTP connections. This can reduce the number of open HTTP sockets across the cluster during non-peak periods. This may improve performance in two ways: * recently-used worker processes should have better memory locality * can avoid the chance for TCP slow-start-after-idle behavior to kick in for DB connections. The downside of this patch is memory/CPU usage between workers may appear lopsided and probably confuse users. This change should also make potential memory leaks more noticeable. --- lib/MogileFS/ProcManager.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index d2acb62f..25d8259e 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -557,7 +557,7 @@ sub ProcessQueues { next unless $clref; # get worker and make sure it's not closed already - my MogileFS::Connection::Worker $worker = shift @IdleQueryWorkers; + my MogileFS::Connection::Worker $worker = pop @IdleQueryWorkers; if (!defined $worker || $worker->{closed}) { unshift @PendingQueries, $clref; next; From ee5f196ae0b11e893f880cc5df8699290120797f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 29 Sep 2012 07:31:35 +0000 Subject: [PATCH 319/405] disable Nagle's algorithm for accepted clients Normally, disabling Nagle's algorithm would have little effect on typical MogileFS traffic: < read one request from client - process request in queryworker > write one response to client < read one request from client - process request in queryworker > write one response to client < read one request from client - process request in queryworker > write one response to client ... However, in certain cases, clients may pipeline requests (and sort responses on the client side). This causes tracker traffic to end up like this: < read multiple requests from client - process requests in parallel on multiple queryworkers > write one response to client > write one response to client > write one response to client ... Since Nagle's algorithm waits for an ACK from each response the server writes before sending the next response, it limits the rate at which the client can receive responses. Informal testing over loopback running the "file_info" command on two batches of 1000 keys each (2000 keys total) consistently reveals a small, ~20-60ms reduction (580-600ms -> 540-580ms) on a somewhat active machine with four queryworkers (and four cores). Like SO_KEEPALIVE, TCP_NODELAY is inherited from the listener by accepted sockets in every system I've checked, so there's no additional overhead in userspace when accepting new clients. --- lib/MogileFS/Server.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 044119c7..fa236020 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -27,7 +27,7 @@ use Time::HiRes (); use Net::Netmask; use LWP::UserAgent; use List::Util; -use Socket qw(SO_KEEPALIVE); +use Socket qw(SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY); use MogileFS::Util qw(daemonize); use MogileFS::Config; @@ -130,6 +130,7 @@ sub run { Listen => 1024 ) or die "Error creating socket: $@\n"; $server->sockopt(SO_KEEPALIVE, 1); + $server->setsockopt(IPPROTO_TCP, TCP_NODELAY, 1); # save sub to accept a client push @servers, $server; From 86129e5d8c816098deca485662a55163afe6a761 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 20 Oct 2012 00:35:54 +0000 Subject: [PATCH 320/405] sqlite: use immediate transactions to prevent busy errors The default (deferred) transaction mode in SQLite delays locking, potentially leading to "database is locked" errors on concurrent access. Immediate transactions lock the database immediately, preventing unnecessary errors at the cost of reduced concurrency. I've still occasionally encountered a "database is locked" or two on my SQLite deployment with many workers over the months. Tested on MySQL, Postgres, and DBD::SQLite 1.29 and 1.37. This feature appeared in DBD::SQLite 1.30, but the extra attribute for DBI->connect is harmless for drivers which do not support this attribute. ref: http://search.cpan.org/dist/DBD-SQLite/lib/DBD/SQLite.pm Using the following instrumentation patch, I have not hit busy/locked errors while putting my SQLite-based MogileFS instance through heavy activity (fsck, uploads, deletes): --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -164,7 +164,12 @@ use constant SQLITE_LOCKED => 6; # A table in the database is locked sub was_deadlock_error { my $err = $_[0]->dbh->err or return 0; - ($err == SQLITE_BUSY || $err == SQLITE_LOCKED); + if ($err == SQLITE_BUSY || $err == SQLITE_LOCKED) { + Mgd::log('info', "DB locked"); + 1; + } else { + 0; + } } sub was_duplicate_error { --- lib/MogileFS/Store.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 1ac1537c..0cfada9b 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -370,6 +370,7 @@ sub dbh { AutoCommit => 1, # FUTURE: will default to on (have to validate all callers first): RaiseError => ($self->{raise_errors} || 0), + sqlite_use_immediate_transaction => 1, }); }; alarm(0); From b05bced2f1b3004bf3dad4031a5a5c20c0d4020f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 12 Jan 2013 00:38:37 +0000 Subject: [PATCH 321/405] nginx: additional version check for uwsgi and scgi Debian squeeze (stable as of 2013/01) uses nginx 0.7.67, so there are likely many users still using this older version. Attempting to specify a dummy {uwsgi,scgi}_temp_path causes errors at startup for me. According to the the nginx CHANGES file, uwsgi appeared in 0.8.40 and scgi appeared in 0.8.42. ref: http://nginx.org/en/CHANGES --- lib/Mogstored/HTTPServer/Nginx.pm | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 29642d3d..79fb675f 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -4,6 +4,12 @@ use strict; use base 'Mogstored::HTTPServer'; use File::Temp (); +# returns an version number suitable for numeric comparison +sub ngx_version { + my ($major, $minor, $point) = @_; + return ($major << 16) + ($minor << 8) + $point; +} + sub start { my $self = shift; my $exe = $self->{bin}; @@ -104,11 +110,21 @@ sub start { unless($nginxMeta =~ /--without-http_proxy_module/sog) { $tempPath .= "proxy_temp_path $tmpDir/proxy_temp;\n"; } - unless($nginxMeta =~ /--without-http_uwsgi_module/sog) { - $tempPath .= "uwsgi_temp_path $tmpDir/uwsgi_temp;\n"; + + # Debian squeeze (stable as of 2013/01) is only on nginx 0.7.67 + + # uwsgi support appeared in nginx 0.8.40 + if (ngx_version(@version) >= ngx_version(0, 8, 40)) { + unless($nginxMeta =~ /--without-http_uwsgi_module/sog) { + $tempPath .= "uwsgi_temp_path $tmpDir/uwsgi_temp;\n"; + } } - unless($nginxMeta =~ /--without-http_scgi_module/sog) { - $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; + + # scgi support appeared in nginx 0.8.42 + if (ngx_version(@version) >= ngx_version(0, 8, 42)) { + unless($nginxMeta =~ /--without-http_scgi_module/sog) { + $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; + } } my ($user) = $> == 1 ? "user root root;" : ""; From 80c09e230a52ff2161f35f96a2ba6780eebc4d3c Mon Sep 17 00:00:00 2001 From: Daniel Frett Date: Fri, 11 Jan 2013 20:52:48 -0500 Subject: [PATCH 322/405] use the ngx_version function for determining non-daemon support also, only calculate the actual version once --- lib/Mogstored/HTTPServer/Nginx.pm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 79fb675f..65e597d5 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -33,14 +33,14 @@ sub start { # get meta-data about nginx binary my $nginxMeta = `$exe -V 2>&1`; - my @version = (0,0,0); + my $ngxVersion = ngx_version(0,0,0); if($nginxMeta =~ /nginx\/(\d+)\.(\d+)\.(\d+)/sog) { - @version = ($1,$2,$3); + $ngxVersion = ngx_version($1,$2,$3); } # determine if nginx can be run in non-daemon mode, supported in $version >= 1.0.9 (non-daemon provides better shutdown/crash support) # See: http://nginx.org/en/docs/faq/daemon_master_process_off.html - my $nondaemon = $version[0] > 1 || ($version[0] == 1 && $version[1] > 0) || ($version[0] == 1 && $version[1] == 0 && $version[2] >= 9); + my $nondaemon = $ngxVersion >= ngx_version(1, 0, 9); # create tmp directory my $tmpDir = $self->{docroot} . '/.tmp'; @@ -114,14 +114,14 @@ sub start { # Debian squeeze (stable as of 2013/01) is only on nginx 0.7.67 # uwsgi support appeared in nginx 0.8.40 - if (ngx_version(@version) >= ngx_version(0, 8, 40)) { + if ($ngxVersion >= ngx_version(0, 8, 40)) { unless($nginxMeta =~ /--without-http_uwsgi_module/sog) { $tempPath .= "uwsgi_temp_path $tmpDir/uwsgi_temp;\n"; } } # scgi support appeared in nginx 0.8.42 - if (ngx_version(@version) >= ngx_version(0, 8, 42)) { + if ($ngxVersion >= ngx_version(0, 8, 42)) { unless($nginxMeta =~ /--without-http_scgi_module/sog) { $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; } From c8942c0783cc3457ad727017493eb2b19d97c9e7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 21 Nov 2012 04:16:47 +0000 Subject: [PATCH 323/405] move checksum and tempfile delete to delete worker This removes two DB calls from the latency-critical queryworker process. This may widen a race condition with reused explicit FIDs, but explicit FIDs are a bad idea anyways and reusing FIDs likely had problems before this change. I've also removed the Postgres-specific delete_fidid() function. commit 7dbfb44d4f443bc5a9c30d772f3842f564efb714 (Make postgres use new delete worker code) removed the Postgres-specific code path and made it functionally identical to the generic version. --- lib/MogileFS/Store.pm | 16 ++++++++++++---- lib/MogileFS/Store/Postgres.pm | 12 ------------ lib/MogileFS/Worker/Delete.pm | 2 ++ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 0cfada9b..55910c07 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1240,18 +1240,26 @@ sub delete_class { $self->condthrow; } +# called from a queryworker process, will trigger delete_fidid_enqueued +# in the delete worker sub delete_fidid { my ($self, $fidid) = @_; - eval { $self->delete_checksum($fidid); }; - $self->condthrow; eval { $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); }; $self->condthrow; - eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; - $self->condthrow; $self->enqueue_for_delete2($fidid, 0); $self->condthrow; } +# Only called from delete workers (after delete_fidid), +# this reduces client-visible latency from the queryworker +sub delete_fidid_enqueued { + my ($self, $fidid) = @_; + eval { $self->delete_checksum($fidid); }; + $self->condthrow; + eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; + $self->condthrow; +} + sub delete_tempfile_row { my ($self, $fidid) = @_; my $rv = eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 09207f18..7debf945 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -735,18 +735,6 @@ sub mark_fidid_unreachable { }; } -sub delete_fidid { - my ($self, $fidid) = @_; - $self->delete_checksum($fidid); - $self->condthrow; - $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); - $self->condthrow; - $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); - $self->condthrow; - $self->enqueue_for_delete2($fidid, 0); - $self->condthrow; -} - sub replace_into_file { my $self = shift; my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index 3d65b192..c70b8abf 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -163,6 +163,8 @@ sub process_deletes2 { next; } + $sto->delete_fidid_enqueued($fidid); + my @devids = $fid->devids; my %devids = map { $_ => 1 } @devids; From 81b0067f8ccdec7603dd3d5d9fea0076c08c4a98 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 12 Jan 2013 06:57:57 +0000 Subject: [PATCH 324/405] httpfile: avoid killing worker on down sidechannel The lack of a mogstored sidechannel listener should not be fatal to a replication worker (or any other worker). This bug only affects checksums users who misconfigure mogstored. --- lib/MogileFS/HTTPFile.pm | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index c7b0442a..01c3632e 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -13,6 +13,8 @@ my $user_agent; my %size_check_retry_after; # host => $hirestime. my %size_check_failcount; # host => $count. +my %sidechannel_nexterr; # host => next error log time + # create a new MogileFS::HTTPFile instance from a URL. not called # "new" because I don't want to imply that it's creating anything. sub at { @@ -158,9 +160,22 @@ sub digest_mgmt { # assuming the storage node can checksum at >=2MB/s, low expectations here my $response_timeout = $self->size / (2 * 1024 * 1024); + my $host = $self->{host}; retry: - $sock = $mogconn->sock($node_timeout) or return; + $sock = eval { $mogconn->sock($node_timeout) }; + if (defined $sock) { + delete $sidechannel_nexterr{$host}; + } else { + # avoid flooding logs with identical messages + my $err = $@; + my $next = $sidechannel_nexterr{$host} || 0; + my $now = time(); + return if $now < $next; + $sidechannel_nexterr{$host} = $now + 300; + return undeferr("sidechannel failure on $alg $uri: $err"); + } + $rv = send($sock, $req, 0); if ($! || $rv != $reqlen) { my $err = $!; From e5b0b0b91d30a0d6e85dfccc6a5256217369441b Mon Sep 17 00:00:00 2001 From: Gernot Vormayr Date: Sun, 13 Jan 2013 23:42:44 +0100 Subject: [PATCH 325/405] typo fix with root check in nginx module Signed-off-by: Eric Wong --- lib/Mogstored/HTTPServer/Nginx.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm index 65e597d5..c3023867 100644 --- a/lib/Mogstored/HTTPServer/Nginx.pm +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -127,7 +127,7 @@ sub start { } } - my ($user) = $> == 1 ? "user root root;" : ""; + my $user = $> == 0 ? "user root root;" : ""; print $fh qq{ pid $pidFile; From eee892b4512c4984154f735f8d6bf7581654fcc9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Jan 2013 07:14:48 +0000 Subject: [PATCH 326/405] query: avoid redundant calls to err_line() Additionally, log redundant calls to err_line so we have a chance at figuring out what is causing redundant calls to err_line() --- lib/MogileFS/Worker/Query.pm | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 9fc8c6e2..7d9020a1 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -202,8 +202,7 @@ sub cmd_create_open { my $args = shift; # has to be filled out for some plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # first, pass this to a hook to do any manipulations needed eval {MogileFS::run_global_hook('cmd_create_open', $args)}; @@ -357,8 +356,7 @@ sub cmd_create_close { my $args = shift; # has to be filled out for some plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # call out to a hook that might modify the arguments for us MogileFS::run_global_hook('cmd_create_close', $args); @@ -488,8 +486,7 @@ sub cmd_updateclass { my MogileFS::Worker::Query $self = shift; my $args = shift; - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # call out to a hook that might modify the arguments for us, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_updateclass', $args); @@ -523,8 +520,7 @@ sub cmd_delete { my $args = shift; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_delete', $args); @@ -563,8 +559,7 @@ sub cmd_file_debug { $fid = $sto->file_row_from_fidid($args->{fid}+0); } else { # If not, require dmid/dkey and pick up the fid from there. - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; return $self->err_line("no_key") unless valid_key($args->{key}); # now invoke the plugin, abort if it tells us to @@ -629,8 +624,7 @@ sub cmd_file_info { my $args = shift; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_file_info', $args); @@ -716,8 +710,7 @@ sub cmd_list_keys { my $args = shift; # validate parameters - my $dmid = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + my $dmid = $self->check_domain($args) or return; my ($prefix, $after, $limit) = ($args->{prefix}, $args->{after}, $args->{limit}); if (defined $prefix and $prefix ne '') { @@ -758,8 +751,7 @@ sub cmd_rename { my $args = shift; # validate parameters - my $dmid = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + my $dmid = $self->check_domain($args) or return; my ($fkey, $tkey) = ($args->{from_key}, $args->{to_key}); unless (valid_key($fkey) && valid_key($tkey)) { return $self->err_line("no_key"); @@ -1089,8 +1081,7 @@ sub cmd_get_paths { my $memcache_ttl = MogileFS::Config->server_setting_cached("memcache_ttl") || 3600; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_get_paths', $args); @@ -1268,8 +1259,7 @@ sub cmd_edit_file { my $memc = MogileFS::Config->memcache_client; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_get_paths', $args); @@ -1807,6 +1797,7 @@ sub err_line { $self->{querystarttime} = undef; } else { # don't send another ERR line if we already sent one + error("err_line called redundantly with $err_code ( " . eurl($err_text) . ")"); return 0; } From 3ff5eec5ed5e0ecb34be13ed5835678aba920670 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Jan 2013 07:29:58 +0000 Subject: [PATCH 327/405] query: fix error reporting for _do_fsck_reset Failed set_server_settings calls just die with errors, so we need to track and log that to syslog. We'll also report we had a database error back to the client (but avoid propagating the exact error message, in case there is any sensitive information). --- lib/MogileFS/Worker/Query.pm | 39 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 7d9020a1..5d3bfc03 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -1509,7 +1509,7 @@ sub cmd_fsck_start { my $final_fid = $intss->("fsck_fid_at_end"); if (($checked_fid && $final_fid && $checked_fid >= $final_fid) || (!$final_fid && !$checked_fid)) { - $self->_do_fsck_reset or return $self->err_line; + $self->_do_fsck_reset or return $self->err_line("db"); } # set params for stats: @@ -1545,27 +1545,34 @@ sub cmd_fsck_reset { $sto->set_server_setting("fsck_highest_fid_checked", ($args->{startpos} ? $args->{startpos} : "0")); - $self->_do_fsck_reset or return $self->err_line; + $self->_do_fsck_reset or return $self->err_line("db"); return $self->ok_line; } sub _do_fsck_reset { my MogileFS::Worker::Query $self = shift; - my $sto = Mgd::get_store(); - $sto->set_server_setting("fsck_start_time", undef); - $sto->set_server_setting("fsck_stop_time", undef); - $sto->set_server_setting("fsck_fids_checked", 0); - $sto->set_server_setting("fsck_fid_at_end", $sto->max_fidid); - - # clear existing event counts summaries. - my $ss = $sto->server_settings; - foreach my $k (keys %$ss) { - next unless $k =~ /^fsck_sum_evcount_/; - $sto->set_server_setting($k, undef); + eval { + my $sto = Mgd::get_store(); + $sto->set_server_setting("fsck_start_time", undef); + $sto->set_server_setting("fsck_stop_time", undef); + $sto->set_server_setting("fsck_fids_checked", 0); + $sto->set_server_setting("fsck_fid_at_end", $sto->max_fidid); + + # clear existing event counts summaries. + my $ss = $sto->server_settings; + foreach my $k (keys %$ss) { + next unless $k =~ /^fsck_sum_evcount_/; + $sto->set_server_setting($k, undef); + } + my $logid = $sto->max_fsck_logid; + $sto->set_server_setting("fsck_start_maxlogid", $logid); + $sto->set_server_setting("fsck_logid_processed", $logid); + }; + if ($@) { + error("DB error in _do_fsck_reset: $@"); + return 0; } - my $logid = $sto->max_fsck_logid; - $sto->set_server_setting("fsck_start_maxlogid", $logid); - $sto->set_server_setting("fsck_logid_processed", $logid); + return 1; } sub cmd_fsck_clearlog { From 2aac660e72a13b886d7d7093e3793087f12647c0 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Tue, 23 Oct 2012 10:24:24 +0000 Subject: [PATCH 328/405] debian/control: sysstat contains /usr/bin/iostat Signed-off-by: Eric Wong --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 99148aa0..f0381d45 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Standards-Version: 3.6.1.0 Package: mogstored Architecture: all -Depends: ${perl:Depends}, libperlbal-perl (>= 1.79), libio-aio-perl, debconf (>= 1.2.0) +Depends: ${perl:Depends}, libperlbal-perl (>= 1.79), libio-aio-perl, debconf (>= 1.2.0), sysstat Suggests: mogilefs-utils Description: storage node daemon for MogileFS Mogstored is a storage node daemon for MogileFS, the open-source From c23cc6b0c29ee2a608f3ec2bc51a93d99e41efd1 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Tue, 15 Jan 2013 11:56:32 +0000 Subject: [PATCH 329/405] Filter the devices before we do an expensive sort. Signed-off-by: Eric Wong --- lib/MogileFS/Worker/Query.pm | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 5d3bfc03..4593139d 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -248,16 +248,15 @@ sub cmd_create_open { $profstart->("find_deviceid"); - my @devices; - - unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [Mgd::device_factory()->get_all], \@devices)) { - @devices = sort_devs_by_freespace(Mgd::device_factory()->get_all); - } - + my @devices = Mgd::device_factory()->get_all; if ($size) { @devices = grep { ($_->mb_free * 1024*1024) > $size } @devices; } + unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [ @devices ], \@devices)) { + @devices = sort_devs_by_freespace(@devices); + } + # find suitable device(s) to put this file on. my @dests; # MogileFS::Device objects which are suitable From d8de2baa06cca3fd4059ad580b39541acea3e4d8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2013 09:42:44 +0000 Subject: [PATCH 330/405] mogstored: fix kqueue usage with daemonization Calling Mogstored::HTTPServer::Perlbal->start() creates a kqueue descriptor. kqueue descriptors are invalidated across fork, so we must avoid kqueue creation until after daemonization. We continue starting non-Perlbal HTTP servers before daemonization, as error reporting can be easier if stderr/stdout are not redirected to /dev/null. ref: http://code.google.com/p/mogilefs/issues/detail?id=72 Cc: to.my.trociny@gmail.com --- mogstored | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mogstored b/mogstored index 608d786b..38d911c1 100755 --- a/mogstored +++ b/mogstored @@ -97,7 +97,13 @@ my $httpsrv = $httpsrv_class->new( maxconns => $max_conns, bin => $serverbin, ); -$httpsrv->start; + +# Configure Perlbal HTTP listener after daemonization since it can create a +# kqueue on *BSD. kqueue descriptors are automatically invalidated on fork(), +# making them unusable after daemonize. For non-Perlbal, starting the +# server before daemonization improves error reporting as daemonization +# redirects stdout/stderr to /dev/null. +$httpsrv->start if $server ne "perlbal"; if ($opt_daemonize) { $httpsrv->pre_daemonize; @@ -106,6 +112,9 @@ if ($opt_daemonize) { print "Running.\n"; } +# It is now safe for Perlbal to create a kqueue +$httpsrv->start if $server eq "perlbal"; + $httpsrv->post_daemonize; # kill our children processes on exit: From 857f2cdaea80c100803e5173337b8dfb183ec1f7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2013 23:44:32 +0000 Subject: [PATCH 331/405] postgres: remove Pg-specific create_class This version is still racy after several years. More importantly, it's missing the change in commit 5d01811ce47b888ca710985bf31ec45c4b7cadfa which allows the default class to be overridden. --- lib/MogileFS/Store/Postgres.pm | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 7debf945..b057b967 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -537,33 +537,6 @@ sub _drop_db { # Data-access things we override # -------------------------------------------------------------------------- -# return new classid on success (non-zero integer), die on failure -# throw 'dup' on duplicate name -# TODO: add locks around entire table -sub create_class { - my ($self, $dmid, $classname) = @_; - my $dbh = $self->dbh; - - # get the max class id in this domain - my $maxid = $dbh->selectrow_array - ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; - - # now insert the new class - my $rv = eval { - $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", - undef, $dmid, $maxid + 1, $classname, 2); - }; - if ($@ || $dbh->err) { - # first is error code for duplicates - if ($self->was_duplicate_error) { - throw("dup"); - } - } - return $maxid + 1 if $rv; - $self->condthrow; - die; -} - # returns 1 on success, 0 on duplicate key error, dies on exception # TODO: need a test to hit the duplicate name error condition sub rename_file { From d92e2a1f9af77c5592aa3be79e73b8ae71a7fd8a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2013 23:51:00 +0000 Subject: [PATCH 332/405] store: wrap create_class in a transaction to avoid races Race conditions in create_class are unlikely to be a problem in normal usage, but this will discourage code duplication which can lead to maintainability issues. --- lib/MogileFS/Store.pm | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 55910c07..d61df11c 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -881,32 +881,36 @@ sub class_has_files { # return new classid on success (non-zero integer), die on failure # throw 'dup' on duplicate name -# override this if you want a less racy version. sub create_class { my ($self, $dmid, $classname) = @_; my $dbh = $self->dbh; - # get the max class id in this domain - my $maxid = $dbh->selectrow_array - ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; + my ($clsid, $rv); - my $clsid = $maxid + 1; - if ($classname eq 'default') { - $clsid = 0; - } - - # now insert the new class - my $rv = eval { - $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", - undef, $dmid, $clsid, $classname, 2); + eval { + $dbh->begin_work; + if ($classname eq 'default') { + $clsid = 0; + } else { + # get the max class id in this domain + my $maxid = $dbh->selectrow_array + ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; + $clsid = $maxid + 1; + } + # now insert the new class + $rv = $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", + undef, $dmid, $clsid, $classname, 2); + $dbh->commit if $rv; }; if ($@ || $dbh->err) { if ($self->was_duplicate_error) { + # ensure we're not inside a transaction + if ($dbh->{AutoCommit} == 0) { eval { $dbh->rollback }; } throw("dup"); } } + $self->condthrow; # this will rollback on errors return $clsid if $rv; - $self->condthrow; die; } From 7eb1674aff2b5ec68038e6eb0a1d064dd9a5fad8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2013 22:23:44 +0000 Subject: [PATCH 333/405] domain removal also removes its default class A default class may enter the class table if its settings (e.g. mindevcount) are altered. The queryworker does not allow removing the default class, so the only way to remove it is to remove it when the domain goes away. --- lib/MogileFS/Store.pm | 30 +++++++++++++++++++++++++----- t/00-startup.t | 23 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index d61df11c..efe80ef9 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -853,9 +853,27 @@ sub delete_host { # return true if deleted, 0 if didn't exist, exception if error sub delete_domain { my ($self, $dmid) = @_; - throw("has_files") if $self->domain_has_files($dmid); - throw("has_classes") if $self->domain_has_classes($dmid); - return $self->dbh->do("DELETE FROM domain WHERE dmid = ?", undef, $dmid); + my ($err, $rv); + my $dbh = $self->dbh; + eval { + $dbh->begin_work; + if ($self->domain_has_files($dmid)) { + $err = "has_files"; + } elsif ($self->domain_has_classes($dmid)) { + $err = "has_classes"; + } else { + $rv = $dbh->do("DELETE FROM domain WHERE dmid = ?", undef, $dmid); + + # remove the "default" class if one was created (for mindevcount) + # this is currently the only way to delete the "default" class + $dbh->do("DELETE FROM class WHERE dmid = ? AND classid = 0", undef, $dmid); + $dbh->commit; + } + $dbh->rollback if $err; + }; + $self->condthrow; # will rollback on errors + throw($err) if $err; + return $rv; } sub domain_has_files { @@ -867,9 +885,11 @@ sub domain_has_files { sub domain_has_classes { my ($self, $dmid) = @_; - my $has_a_class = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? LIMIT 1', + # queryworker does not permit removing default class, so domain_has_classes + # should not register the default class + my $has_a_class = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? AND classid != 0 LIMIT 1', undef, $dmid); - return $has_a_class ? 1 : 0; + return defined($has_a_class); } sub class_has_files { diff --git a/t/00-startup.t b/t/00-startup.t index c532b83d..32e4d62a 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -112,6 +112,29 @@ ok($tmptrack->mogadm("domain", "add", "todie"), "created todie domain"); ok($tmptrack->mogadm("domain", "delete", "todie"), "delete todie domain"); ok(!$tmptrack->mogadm("domain", "delete", "todie"), "didn't delete todie domain again"); +# ensure "default" class is removed when its domain is removed +{ + use Data::Dumper; + my $before = Dumper($sto->get_all_classes); + ok($tmptrack->mogadm("domain", "add", "def"), "created def domain"); + + my $dmid = $sto->get_domainid_by_name("def"); + ok(defined($dmid), "def dmid retrieved"); + + isnt($sto->domain_has_classes($dmid), "domain_has_classes does not show default class"); + ok($tmptrack->mogadm("class", "modify", "def", "default", "--mindevcount=3"), "modified default to have mindevcount=3"); + + my $classid = $sto->get_classid_by_name($dmid, "default"); + is($classid, 0, "default class has classid=0"); + isnt($sto->domain_has_classes($dmid), "domain_has_classes does not show default class"); + ok($tmptrack->mogadm("domain", "delete", "def"), "remove def domain"); + is($sto->get_domainid_by_name("def"), undef, "def nonexistent"); + is($sto->get_classid_by_name($dmid, "default"), undef, "def/default class nonexistent"); + + my $after = Dumper($sto->get_all_classes); + is($after, $before, "class listing is unchanged"); +} + ok($tmptrack->mogadm("domain", "add", "hasclass"), "created hasclass domain"); ok($tmptrack->mogadm("class", "add", "hasclass", "nodel"), "created nodel class"); ok(!$tmptrack->mogadm("domain", "delete", "hasclass"), "didn't delete hasclass domain"); From 61e1d3b1e4e8b759d8a8d8334bd47289c50a423f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 Jan 2013 12:27:03 +0000 Subject: [PATCH 334/405] tests: add "!want " helper This is used in several places, and will make code easier to maintain going forward. --- lib/MogileFS/Test.pm | 30 +++++++++++++++++++++++++++++- t/00-startup.t | 8 ++------ t/50-checksum.t | 32 ++++---------------------------- t/51-checksum_class_change.t | 29 +++-------------------------- 4 files changed, 38 insertions(+), 61 deletions(-) diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 4270268b..9da487e8 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -9,7 +9,7 @@ use IO::Socket::INET; use MogileFS::Server; use base 'Exporter'; -our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker &try_for); +our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker &try_for &want); sub find_mogclient_or_skip { @@ -161,6 +161,34 @@ sub try_for { return 0; } +sub want { + my ($admin, $count, $jobclass) = @_; + my $req = "!want $count $jobclass\r\n"; + + syswrite($admin, $req) or die "syswrite: $!\n"; + + my $r = <$admin>; + if ($r =~ /Now desiring $count children doing '$jobclass'/ && <$admin> eq ".\r\n") { + my $rcount; + try_for(30, sub { + $rcount = -1; + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + if ($line =~ /\A$jobclass count (\d+)/) { + $rcount = $1; + } + last if $line eq ".\r\n"; + } + $rcount == $count; + }); + return 1 if $rcount == $count; + die "got $jobclass count $rcount (expected=$count)\n"; + } + die "got bad response for $req: $r\n"; +} + ############################################################################ package ProcessHandle; sub new { diff --git a/t/00-startup.t b/t/00-startup.t index 32e4d62a..58c226da 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -84,9 +84,7 @@ ok($be->do_request("test", {}), "test ping again worked"); { my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); - $c->syswrite("!want 1 queryworker\r\n"); - my $res1 = <$c> . <$c>; - like($res1, qr/Now desiring 1 children doing 'queryworker'/, "set 1 queryworker"); + ok(want($c, 1, "queryworker"), "set 1 queryworker"); my $expect = "ERR no_domain No+domain+provided\r\n" x 2; @@ -103,9 +101,7 @@ ok($be->do_request("test", {}), "test ping again worked"); } while ($r && length($resp) != length($expect)); is($resp, $expect, "response matches expected"); - $c->syswrite("!want 2 queryworker\r\n"); - my $res2 = <$c> . <$c>; - like($res2, qr/Now desiring 2 children doing 'queryworker'/, "restored 2 queryworkers"); + ok(want($c, 2, "queryworker"), "restored 2 queryworkers"); } ok($tmptrack->mogadm("domain", "add", "todie"), "created todie domain"); diff --git a/t/50-checksum.t b/t/50-checksum.t index c9450b6f..8e7b0aa8 100644 --- a/t/50-checksum.t +++ b/t/50-checksum.t @@ -68,28 +68,6 @@ sub wait_for_monitor { $be->{timeout} = $was; } -sub stop_replicate { - my ($admin) = @_; - syswrite($admin, "!want 0 replicate\r\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabling replicate"); - - my $count; - try_for(30, sub { - $count = -1; - syswrite($admin, "!jobs\r\n"); - MogileFS::Util::wait_for_readability(fileno($admin), 10); - while (1) { - my $line = <$admin>; - if ($line =~ /\Areplicate count (\d+)/) { - $count = $1; - } - last if $line eq ".\r\n"; - } - $count == 0; - }); - is($count, 0, "replicate count is zero"); -} - sub full_fsck { my $tmptrack = shift; @@ -165,7 +143,7 @@ use Digest::MD5 qw/md5_hex/; { my $key = 'savecksum'; - stop_replicate($admin); + want($admin, 0, "replicate"); %opts = ( domain => "testdom", class => "2copies", key => $key ); $rv = $be->do_request("create_open", \%opts); @@ -190,8 +168,7 @@ use Digest::MD5 qw/md5_hex/; $info = $mogc->file_info($key); is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); - syswrite($admin, "!want 1 replicate\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + want($admin, 1, "replicate"); # wait for replicate to recreate checksum try_for(30, sub { @@ -229,7 +206,7 @@ use Digest::MD5 qw/md5_hex/; { my $key = 'lazycksum'; - stop_replicate($admin); + want($admin, 0, "replicate"); my $fh = $mogc->new_file($key, "2copies"); print $fh "lazy"; @@ -237,8 +214,7 @@ use Digest::MD5 qw/md5_hex/; my $info = $mogc->file_info($key); is($info->{checksum}, 'MISSING', 'checksum is MISSING'); - syswrite($admin, "!want 1 replicate\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + want($admin, 1, "replicate"); try_for(30, sub { @paths = $mogc->get_paths($key); diff --git a/t/51-checksum_class_change.t b/t/51-checksum_class_change.t index 27f4b161..cea0b63c 100644 --- a/t/51-checksum_class_change.t +++ b/t/51-checksum_class_change.t @@ -68,30 +68,8 @@ sub wait_for_monitor { $be->{timeout} = $was; } -sub stop_replicate { - my ($admin) = @_; - syswrite($admin, "!want 0 replicate\r\n"); # disable replication - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "disabling replicate"); - - my $count; - try_for(30, sub { - $count = -1; - syswrite($admin, "!jobs\r\n"); - MogileFS::Util::wait_for_readability(fileno($admin), 10); - while (1) { - my $line = <$admin>; - if ($line =~ /\Areplicate count (\d+)/) { - $count = $1; - } - last if $line eq ".\r\n"; - } - $count == 0; - }); - is($count, 0, "replicate count is zero"); -} - wait_for_monitor($be); -stop_replicate($admin); +want($admin, 0, "replicate"); my ($req, $rv, %opts, @paths, @fsck_log); my $ua = LWP::UserAgent->new; @@ -129,8 +107,7 @@ my $key = "foo"; # replicate should work even if we have, but don't need a checksum anymore { - syswrite($admin, "!want 1 replicate\n"); - ok(<$admin> =~ /Now desiring/ && <$admin> eq ".\r\n", "enabled replicate"); + want($admin, 1, "replicate"); # wait for replicate to recreate checksum try_for(30, sub { @@ -138,7 +115,7 @@ my $key = "foo"; scalar(@paths) != 1; }); is(scalar(@paths), 2, "replicated successfully"); - stop_replicate($admin); + want($admin, 0, "replicate"); } # switch to SHA-1 checksums in "changer" class From 6c9f5f6b73d77a348172cbcc4f79245e02801c22 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 Jan 2013 12:38:08 +0000 Subject: [PATCH 335/405] reaper: ensure worker can be stopped via "!want" Now, all of our job classes may be controlled via "!want" --- lib/MogileFS/Worker/Reaper.pm | 2 +- t/00-startup.t | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index 21c22696..b7884774 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -155,8 +155,8 @@ sub work { my $reap_check; $reap_check = sub { # get db and note we're starting a run + $self->parent_ping; debug("Reaper running; looking for dead devices"); - $self->still_alive; foreach my $dev (grep { $_->dstate->is_perm_dead } Mgd::device_factory()->get_all) diff --git a/t/00-startup.t b/t/00-startup.t index 58c226da..c780d34c 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -440,4 +440,17 @@ foreach my $t (qw(file file_on file_to_delete)) { ok($mogc->delete("0"), "delete 0 works"); } +# ensure all workers can be stopped/started +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + my @jobs = qw(fsck queryworker delete replicate reaper monitor job_master); + + foreach my $j (@jobs) { + ok(want($c, 0, $j), "shut down all $j"); + } + foreach my $j (@jobs) { + ok(want($c, 1, $j), "start 1 $j"); + } +} + done_testing(); From e3f7601146881ca9fb49d242c6ec97f8cb5231a7 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 2 Feb 2013 16:54:20 -0800 Subject: [PATCH 336/405] Serialize tempfile reaping prevents dogpiling on some slowish queries if you DB is hosed, or if you have many tempfile rows that need processing. --- lib/MogileFS/Worker/Delete.pm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index c70b8abf..a811e6e6 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -40,7 +40,13 @@ sub work { # call our workers, and have them do things # RETVAL = 0; I think I am done working for now # RETVAL = 1; I have more work to do - my $tempres = $self->process_tempfiles; + my $lock = 'mgfs:tempfiles'; + # This isn't something we need to wait for: just need to ensure one is. + my $tempres; + if (Mgd::get_store()->get_lock($lock, 0)) { + $tempres = $self->process_tempfiles; + Mgd::get_store()->release_lock($lock); + } my $delres; if (time() > $old_queue_check) { $self->reenqueue_delayed_deletes; From 221808c482aa9ed32fa08985432e0ab142ebd113 Mon Sep 17 00:00:00 2001 From: dormando Date: Sat, 2 Feb 2013 17:10:26 -0800 Subject: [PATCH 337/405] Checking in changes prior to tagging of version 2.67. Changelog diff is: diff --git a/CHANGES b/CHANGES index bd7e38a..f0f578c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,36 @@ +2013-02-02: Release version 2.67 + + * Serialize tempfile reaping (dormando ) + + * reaper: ensure worker can be stopped via "!want" (Eric Wong ) + + * domain removal also removes its default class (Eric Wong ) + + * store: wrap create_class in a transaction to avoid races (Eric Wong ) + + * mogstored: fix kqueue usage with daemonization (Eric Wong ) + + * Filter the devices before we do an expensive sort. (Dave Lambley ) + + * httpfile: avoid killing worker on down sidechannel (Eric Wong ) + + * move checksum and tempfile delete to delete worker (Eric Wong ) + + * sqlite: use immediate transactions to prevent busy errors (Eric Wong ) + + * disable Nagle's algorithm for accepted clients (Eric Wong ) + + * ProcManager: favor using recently-used queryworkers (Eric Wong ) + + * Do both sorts in one method, to save on shared initialisation. (Dave Lambley ) + + * Pull out device sorting into it's own method for overriding. (Dave Lambley ) + + * Reseed the random number generator after forking. (Dave Lambley ) + + * support nginx server type in mogstored command line options (Daniel Frett ) + (also Gernot Vormayr , others) + 2013-01-06: Release version 2.66 * add a hook to cmd_updateclass (Daniel Frett ) --- CHANGES | 33 +++++++++++++++++++++++++++++++++ MANIFEST | 1 + lib/MogileFS/Server.pm | 2 +- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index bd7e38a6..f0f578c8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,36 @@ +2013-02-02: Release version 2.67 + + * Serialize tempfile reaping (dormando ) + + * reaper: ensure worker can be stopped via "!want" (Eric Wong ) + + * domain removal also removes its default class (Eric Wong ) + + * store: wrap create_class in a transaction to avoid races (Eric Wong ) + + * mogstored: fix kqueue usage with daemonization (Eric Wong ) + + * Filter the devices before we do an expensive sort. (Dave Lambley ) + + * httpfile: avoid killing worker on down sidechannel (Eric Wong ) + + * move checksum and tempfile delete to delete worker (Eric Wong ) + + * sqlite: use immediate transactions to prevent busy errors (Eric Wong ) + + * disable Nagle's algorithm for accepted clients (Eric Wong ) + + * ProcManager: favor using recently-used queryworkers (Eric Wong ) + + * Do both sorts in one method, to save on shared initialisation. (Dave Lambley ) + + * Pull out device sorting into it's own method for overriding. (Dave Lambley ) + + * Reseed the random number generator after forking. (Dave Lambley ) + + * support nginx server type in mogstored command line options (Daniel Frett ) + (also Gernot Vormayr , others) + 2013-01-06: Release version 2.66 * add a hook to cmd_updateclass (Daniel Frett ) diff --git a/MANIFEST b/MANIFEST index 2a5a61fb..ea8eadf2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -55,6 +55,7 @@ lib/Mogstored/ChildProcess/IOStat.pm lib/Mogstored/FIDStatter.pm lib/Mogstored/HTTPServer.pm lib/Mogstored/HTTPServer/Apache.pm +lib/Mogstored/HTTPServer/Nginx.pm lib/Mogstored/HTTPServer/Lighttpd.pm lib/Mogstored/HTTPServer/Perlbal.pm lib/Mogstored/HTTPServer/None.pm diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index fa236020..0719b81c 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.66"; +$VERSION = "2.67"; =head1 NAME From 461b1e3bbac954e3c99a3ef33622a446341d092a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 7 Feb 2013 03:36:07 +0000 Subject: [PATCH 338/405] list_keys: consistent ESCAPE usage across DB types Without specifying an ESCAPE character for LIKE queries, the '\' we use for escaping is treated as a literal and improperly matched keys with '\' in them under SQLite. This is only needed for SQLite, as the SQLite language reference makes no reference of a default ESCAPE character in http://www.sqlite.org/lang_expr.html ESCAPE is supported in MySQL and Postgres, too; and defaults to '\'. We specify it anyways to reduce code differences between different databases. Tested on MySQL 5.1.66 and Postgres 8.4.13 on Debian 6.0 and SQLite 3.7.13 on Debian 7.0 --- lib/MogileFS/Store.pm | 4 ++-- t/00-startup.t | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index efe80ef9..c5de6810 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1895,8 +1895,8 @@ sub get_keys_like { # now select out our keys return $self->dbh->selectcol_arrayref - ("SELECT dkey FROM file WHERE dmid = ? AND dkey $like ? AND dkey > ? " . - "ORDER BY dkey LIMIT $limit", undef, $dmid, $prefix, $after); + ("SELECT dkey FROM file WHERE dmid = ? AND dkey $like ? ESCAPE ? AND dkey > ? " . + "ORDER BY dkey LIMIT $limit", undef, $dmid, $prefix, "\\", $after); } sub get_keys_like_operator { return "LIKE"; } diff --git a/t/00-startup.t b/t/00-startup.t index c780d34c..148f1a0b 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -453,4 +453,27 @@ foreach my $t (qw(file file_on file_to_delete)) { } } +# list_keys with underscore +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + + foreach my $k (qw(under_score under.score)) { + my $fh = $mogc->new_file($k, '1copy'); + ok($fh, "got filehandle for $k"); + ok(close($fh), "created file $k"); + } + + # we only have one queryworker from the previous test, so no need to + # clear cache twice and wait on monitor after "mogadm settings set" + foreach my $cslk (qw(on off)) { + ok($tmptrack->mogadm("settings", "set", "case_sensitive_list_keys", $cslk), "case_sensitive_list_keys = $cslk"); + ok($be->do_request("clear_cache", {}), "cleared_cache"); + my @l = $mogc->list_keys("under_"); + is_deeply(['under_score', [ 'under_score' ]], \@l, "list_keys handled underscore properly (case-sensitive $cslk)"); + } + + # restore default + $sto->set_server_setting('case_sensitive_list_keys', undef); +} + done_testing(); From 0889c79890ad75326030b389f1f18adf6aab2a15 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 7 Feb 2013 04:27:20 +0000 Subject: [PATCH 339/405] list_keys: escape in Store, allow [%\\] as prefix If we support non-SQL DBs in the future, escaping rules could become store-specific, so Worker/Query is not the right place for it. Since '%' and '\' may be escaped just like any other character, we may also allow these characters as prefixes by properly escaping them. Tested on MySQL 5.1.66 and Postgres 8.4.13 on Debian 6.0 and SQLite 3.7.13 on Debian 7.0 --- lib/MogileFS/Store.pm | 4 ++++ lib/MogileFS/Worker/Query.pm | 8 -------- t/00-startup.t | 8 +++++++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index c5de6810..656f9a62 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1888,6 +1888,10 @@ sub get_keys_like { # fix the input... prefix always ends with a % so that it works # in a LIKE call, and after is either blank or something $prefix = '' unless defined $prefix; + + # escape underscores, % and \ + $prefix =~ s/([%\\_])/\\$1/g; + $prefix .= '%'; $after = '' unless defined $after; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 4593139d..4f425cbe 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -716,13 +716,6 @@ sub cmd_list_keys { # now validate that after matches prefix return $self->err_line('after_mismatch') if $after && $after !~ /^$prefix/; - - # verify there are no % or \ characters - return $self->err_line('invalid_chars') - if $prefix =~ /[%\\]/; - - # escape underscores - $prefix =~ s/_/\\_/g; } $limit ||= 1000; @@ -1772,7 +1765,6 @@ sub err_line { 'host_mismatch' => "The device specified doesn't belong to the host specified", 'host_not_empty' => "Unable to delete host; it contains devices still", 'host_not_found' => "Host not found", - 'invalid_chars' => "Patterns must not contain backslashes (\\) or percent signs (%).", 'invalid_checker_level' => "Checker level invalid. Please see documentation on this command.", 'invalid_mindevcount' => "The mindevcount must be at least 1", 'key_exists' => "Target key name already exists; can't overwrite.", diff --git a/t/00-startup.t b/t/00-startup.t index 148f1a0b..a867ea8a 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -457,7 +457,7 @@ foreach my $t (qw(file file_on file_to_delete)) { { my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); - foreach my $k (qw(under_score under.score)) { + foreach my $k (qw(under_score under.score per%cent back\\slash)) { my $fh = $mogc->new_file($k, '1copy'); ok($fh, "got filehandle for $k"); ok(close($fh), "created file $k"); @@ -470,6 +470,12 @@ foreach my $t (qw(file file_on file_to_delete)) { ok($be->do_request("clear_cache", {}), "cleared_cache"); my @l = $mogc->list_keys("under_"); is_deeply(['under_score', [ 'under_score' ]], \@l, "list_keys handled underscore properly (case-sensitive $cslk)"); + + @l = $mogc->list_keys("per%"); + is_deeply(['per%cent', [ 'per%cent' ]], \@l, "list_keys handled % properly (case-sensitive $cslk)"); + + @l = $mogc->list_keys("back\\"); + is_deeply(['back\slash', [ 'back\slash' ]], \@l, "list_keys handled \\ properly (case-sensitive $cslk)"); } # restore default From a92cfe7cd453d6c6948eef12ec9eb33004a77d46 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 12 Feb 2013 20:45:02 +0000 Subject: [PATCH 340/405] reaper: detect resurrection of "dead" devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although never officially supported in MogileFS, some users will manage to change "dead" devices to another state. When running fsck, this may cause the desperate search to continually fail as any files found and added to file_on table will just be reaped. Reported-by: Ask Bjørn Hansen Subject: fsck/FOND not adding a row to file_on Message-ID: --- lib/MogileFS/Worker/Reaper.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index b7884774..17679cd1 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -90,6 +90,12 @@ sub reap_dev { $delay = undef; } + # user resurrected a "dead" device, not supported, really... + if (!$dev->dstate->is_perm_dead) { + Mgd::log("dev$devid is no longer dead to reaper"); + return; + } + # limit == 0 if we hit the queue size limit, we'll just reschedule if ($limit && $dev) { my $sto = Mgd::get_store(); From 47b710fdef40b6421ec8239951e0c7df92f5e245 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 12 Feb 2013 20:53:06 +0000 Subject: [PATCH 341/405] fsck: do not log FOND if note_on_device croaks note_on_device may croak, so avoid logging FOND until we've successfully called note_on_device to ensure the fsck log is consistent with what was done. --- lib/MogileFS/Worker/Fsck.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 1c0b9eba..5e17b1f3 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -323,8 +323,8 @@ sub fix_fid { } # wow, we actually found it! - $fid->fsck_log(EV_FOUND_FID); $fid->note_on_device($good_devs[0]); # at least one good one. + $fid->fsck_log(EV_FOUND_FID); # fall through to check policy (which will most likely be # wrong, with only one file_on record...) and re-replicate From 32c23136374d47aa7e4e54868aa2c10fd6aed476 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Wed, 13 Feb 2013 11:09:48 +0000 Subject: [PATCH 342/405] Tell the kernel we're doing sequential reads. [ew: squashed Dave's change to make IO::AIO optional] Signed-off-by: Eric Wong --- lib/Mogstored/SideChannelClient.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index f11f0e83..f3dbab38 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -12,6 +12,10 @@ use Digest; use POSIX qw(O_RDONLY); use Mogstored::TaskQueue; +BEGIN { + eval { require IO::AIO; }; +} + # TODO: interface to make this tunable my %digest_queues; @@ -128,6 +132,9 @@ sub digest { Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { my $fh = shift; + eval { + IO::AIO::fadvise(fileno($fh), 0, 0, IO::AIO::FADV_SEQUENTIAL); + }; if ($self->{closed}) { CORE::close($fh) if $fh; From 534177ae007d66c86e54306a822d254a666cb7e9 Mon Sep 17 00:00:00 2001 From: Dave Lambley Date: Fri, 15 Feb 2013 15:28:25 +0000 Subject: [PATCH 343/405] Don't emit warnings if we're lacking the space free of a device. If we don't find space on devices with known space free, try the unknowns. Signed-off-by: Eric Wong --- lib/MogileFS/Worker/Query.pm | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 4593139d..517b8751 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -250,7 +250,14 @@ sub cmd_create_open { my @devices = Mgd::device_factory()->get_all; if ($size) { - @devices = grep { ($_->mb_free * 1024*1024) > $size } @devices; + # We first ignore all the devices with an unknown space free. + @devices = grep { length($_->mb_free) && ($_->mb_free * 1024*1024) > $size } @devices; + + # If we didn't find any, try all the devices with an unknown space free. + # This may happen if mogstored isn't running. + if (!@devices) { + @devices = grep { !length($_->mb_free) } Mgd::device_factory()->get_all; + } } unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [ @devices ], \@devices)) { From 93eac8826f38bc9799d7b810ee065c05afcc5151 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 23 Feb 2013 19:37:52 +0000 Subject: [PATCH 344/405] ProcManager: only log times_out_of_qworkers for new queries Logging times_out_of_qworkers in ProcessQueues is not accurate: recently-idle queryworkers may not be noticed and marked idle while ProcessQueues is looping and draining the @IdleQueryWorkers pool. Instead, only log times_out_of_qworkers when new requests are enqueued. --- lib/MogileFS/ProcManager.pm | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 25d8259e..7b303650 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -415,6 +415,10 @@ sub EnqueueCommandRequest { ($client->peer_ip_string || '0.0.0.0') . " $line" ]; MogileFS::ProcManager->ProcessQueues; + if (@PendingQueries) { + # Don't like the name. Feel free to change if you find better. + $Stats{times_out_of_qworkers}++; + } } # puts a worker back in the queue, deleting any outstanding jobs in @@ -574,11 +578,6 @@ sub ProcessQueues { # 123-455 10.2.3.123 get_paths foo=bar&blah=bar\r\n $worker->write("$worker->{pid}-$worker->{reqid} $clref->[1]\r\n"); } - - if (@PendingQueries) { - # Don't like the name. Feel free to change if you find better. - $Stats{times_out_of_qworkers}++; - } } # send short descriptions of commands we support to the user From 21049f902e60b5c6c8ed5e600af3737ad1beb4dc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 27 Feb 2013 10:11:55 +0000 Subject: [PATCH 345/405] mogstored: avoid bareword on IO::AIO w/o fadvise IO::AIO 2.4 on Debian stable lacks IO::AIO::FADV_SEQUENTIAL constant, causing compilation to fail on the bareword. Accessing the constant as a subroutine call (via "()") avoids the bareword and defers the error to runtime (which is trapped by eval). Tested under IO::AIO 2.4 on Debian stable and IO::AIO 4.15 on Debian testing (verified fadvise64() syscall under strace). --- lib/Mogstored/SideChannelClient.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index f3dbab38..e68421bf 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -133,7 +133,7 @@ sub digest { Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { my $fh = shift; eval { - IO::AIO::fadvise(fileno($fh), 0, 0, IO::AIO::FADV_SEQUENTIAL); + IO::AIO::fadvise(fileno($fh), 0, 0, IO::AIO::FADV_SEQUENTIAL()); }; if ($self->{closed}) { From 947ae4b0f64218e6fba9dd7bd368f92e490c596d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 23 Feb 2013 09:15:49 +0000 Subject: [PATCH 346/405] httpfile: correct FILE_MISSING check in digest_mgmt This is unlikely to be an issue in fsck, fsck checks file size/existence before digesting the file. --- lib/MogileFS/HTTPFile.pm | 7 +++---- t/40-httpfile.t | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 01c3632e..4f1370f9 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -204,15 +204,14 @@ retry: } elsif ($rv =~ /^\Q$uri\E \Q$alg\E=([a-f0-9]{32,128})\r\n/) { my $hexdigest = $1; - if ($hexdigest eq FILE_MISSING) { - # FIXME, this could be another error like EMFILE/ENFILE - return FILE_MISSING; - } my $checksum = eval { MogileFS::Checksum->from_string(0, "$alg:$hexdigest") }; return undeferr("$alg failed for $uri: $@") if $@; return $checksum->{checksum}; + } elsif ($rv =~ /^\Q$uri\E \Q$alg\E=-1\r\n/) { + # FIXME, this could be another error like EMFILE/ENFILE + return FILE_MISSING; } elsif ($rv =~ /^ERROR /) { return; # old server, fallback to HTTP } diff --git a/t/40-httpfile.t b/t/40-httpfile.t index 7fed6d73..ddcfa51a 100644 --- a/t/40-httpfile.t +++ b/t/40-httpfile.t @@ -126,4 +126,8 @@ ok($size == $file->size, "big file size match $size"); ok($file->digest_mgmt('MD5', sub {}) eq $expect, "digest_mgmt('MD5') on big file"); ok($file->digest_http('MD5', sub {}) eq $expect, "digest_http('MD5') on big file"); +ok($file->delete, "file deleted"); +is(-1, $file->digest_http('MD5', sub {}), "digest_http detected missing"); +is(-1, $file->digest_mgmt('MD5', sub {}), "digest_mgmt detected missing"); + done_testing(); From 265ccefd3b3ef6e81161c6d2fba9dfbda4782560 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 23 Feb 2013 09:21:07 +0000 Subject: [PATCH 347/405] httpfile: correct timeouts for sidechannel digest fsck digests are deprioritized and serialized in mogstored, so it's nearly impossible to tell what's in the queue before our request. Since fsck is not latency critical, extend the timeout for that. We also need to account for normal seek/network latency for non-fsck digest requests, so add node_timeout to that. These bugs were mostly hidden since we are relying on <> to read, which may incur watchdog timeouts. --- lib/MogileFS/HTTPFile.pm | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 4f1370f9..002841e3 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -149,6 +149,18 @@ sub digest_mgmt { my $rv; my $expiry; + # assuming the storage node can checksum at >=2MB/s, low expectations here + my $response_timeout = $self->size / (2 * 1024 * 1024); + if ($reason && $reason eq "fsck") { + # fsck has low priority in mogstored and is concurrency-limited, + # so this may be queued indefinitely behind digest requests for + # large files + $response_timeout += 3600; + } else { + # account for disk/network latency: + $response_timeout += $node_timeout; + } + $reason = defined($reason) ? " $reason" : ""; my $uri = $self->{uri}; my $req = "$alg $uri$reason\r\n"; @@ -158,8 +170,6 @@ sub digest_mgmt { # after sending a request my $retries = 2; - # assuming the storage node can checksum at >=2MB/s, low expectations here - my $response_timeout = $self->size / (2 * 1024 * 1024); my $host = $self->{host}; retry: From f369b15978a75f26537a5a3f4df2e5575139091b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 9 Mar 2013 07:46:23 +0000 Subject: [PATCH 348/405] fix "drain" handling used by MultipleHosts replpolicy MogileFS::DeviceState was never updated for the 2.40 drain changes. The broken-since-2.40 should_have_files sub caused ReplicationPolicy::MultipleHosts to overreplicate files, as it was not counting drain devices in the total disks check. Thanks to Tim on for reporting this to the mailing list at mogile@googlegroups.com --- lib/MogileFS/DeviceState.pm | 2 +- t/multiple-hosts-replpol.t | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/DeviceState.pm b/lib/MogileFS/DeviceState.pm index 6ac6add7..0b052a2a 100644 --- a/lib/MogileFS/DeviceState.pm +++ b/lib/MogileFS/DeviceState.pm @@ -44,7 +44,7 @@ sub can_delete_from { $_[0]->{write} } sub can_read_from { $_[0]->{read} } sub should_get_new_files { $_[0]->{new_files} } sub should_get_repl_files { $_[0]->{new_files} } -sub should_have_files { ! ($_[0]->{drain} || $_[0]->{dead}) } +sub should_have_files { ! $_[0]->{dead} } sub should_monitor { $_[0]->{monitor} } # named inconveniently so it's not taken to mean equalling string diff --git a/t/multiple-hosts-replpol.t b/t/multiple-hosts-replpol.t index dcb6c6f2..1d5545ca 100644 --- a/t/multiple-hosts-replpol.t +++ b/t/multiple-hosts-replpol.t @@ -63,6 +63,16 @@ is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), "all_good"); +# be happy with one drain copy +is(rr("min=2 h1[d3=X,drain d5=_] h2[d4=X d6=_]"), + "all_good", + "we are happy with one copy in a drain device"); + +# drain copy counts +is(rr("min=2 h1[d3=X,drain d5=X] h2[d4=X d6=_]"), + "too_good", + "the extra copy in drain leaves us too satisfied"); + sub rr { my ($state) = @_; my $ostate = $state; # original From 9bba043b8a595a8dd4fc41a2136bd4e377d00c2a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 30 Mar 2013 01:29:02 +0000 Subject: [PATCH 349/405] fsck: this avoid redundant fsck log entries With many fsck workers and slow fsck (due to checksumming large files and/or high network latency), it may be possible for fsck workers to start working on the same FID without a lock. ref: ML Subject: "FSCK Status/Log Entries" --- lib/MogileFS/Worker/Fsck.pm | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 1c0b9eba..8904be65 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -108,6 +108,9 @@ sub check_fid { my ($self, $fid) = @_; my $fix = sub { + my ($reason, $recheck) = @_; + my $fixed; + # we cached devids without locking for the fast path, # ensure we get an up-to-date list in the slow path. $fid->forget_cached_devids; @@ -125,7 +128,16 @@ sub check_fid { return HANDLED; } - my $fixed = eval { $self->fix_fid($fid) }; + # we may have a lockless check which failed, retry the check + # with the lock and see if it succeeds here: + if ($recheck) { + $fixed = $recheck->(); + if (!$fixed) { + $fid->fsck_log($reason); + } + } + + $fixed ||= eval { $self->fix_fid($fid) }; my $err = $@; $sto->note_done_replicating($fid->id); if (! defined $fixed) { @@ -141,20 +153,18 @@ sub check_fid { # first obvious fucked-up case: no devids even presumed to exist. unless ($fid->devids) { - # first, log this weird condition. - $fid->fsck_log(EV_NO_PATHS); - - # weird, schedule a fix (which will do a search over all + # weird, recheck with a lock and then log it if it fails + # and attempt a fix (which will do a search over all # devices as a last-ditch effort to locate it) - return $fix->(); + return $fix->(EV_NO_PATHS, sub { $fid->devids }); } # first, see if the assumed devids meet the replication policy for # the fid's class. unless ($fid->devids_meet_policy) { - # log a policy violation - $fid->fsck_log(EV_POLICY_VIOLATION); - return $fix->(); + # recheck for policy violation under a lock, logging the violation + # if we failed. + return $fix->(EV_POLICY_VIOLATION, sub { $fid->devids_meet_policy }); } # This is a simple fixup case From 2c52aedb532a98c68970ef50c2fc03b14b0a8634 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 1 Apr 2013 19:22:15 +0000 Subject: [PATCH 350/405] remove unused *::get_dbh subroutines These subroutines are unused. --- lib/MogileFS/Server.pm | 1 - lib/MogileFS/Worker.pm | 4 ---- 2 files changed, 5 deletions(-) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 0719b81c..2bc8ad77 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -219,7 +219,6 @@ sub validate_dbh { error("Error validating master DB: $@") if $@ && $had_dbh; return $dbh; } -sub get_dbh { return Mgd::get_store()->dbh } # the eventual replacement for callers asking for a dbh directly: # they'll ask for the current store, which is a database abstraction diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index d077df01..2af192e2 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -45,10 +45,6 @@ sub validate_dbh { return Mgd::validate_dbh(); } -sub get_dbh { - return Mgd::get_dbh(); -} - sub monitor_has_run { my $self = shift; return $self->{monitor_has_run} ? 1 : 0; From b87f38a91c9859e1ffdacdae2d28cb3ef0a2e0da Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Jul 2013 02:16:35 +0000 Subject: [PATCH 351/405] ProcManager: log socketpair errors correctly Log the correct name of the failed function and the error string associated with the OS errno to aid in debugging. ML Ref: Date: Mon, 8 Jul 2013 17:14:40 -0700 (PDT) From: Tim To: mogile@googlegroups.com Message-Id: <12aa5269-7275-476b-86b6-863223005595@googlegroups.com> Subject: MogileFS crashes --- lib/MogileFS/ProcManager.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 7b303650..0a1bf2a1 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -212,7 +212,7 @@ sub make_new_child { or return error("Can't block SIGINT for fork: $!"); socketpair(my $parents_ipc, my $childs_ipc, AF_UNIX, SOCK_STREAM, PF_UNSPEC ) - or die( "Sockpair failed" ); + or die( "socketpair failed: $!" ); return error("fork failed creating $job: $!") unless defined ($pid = fork); From d45bbf0544e8a91b69aa4a53ab312070ea7b6276 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Jul 2013 02:10:12 +0000 Subject: [PATCH 352/405] httpfile: log mogstored I/O errors when checksumming Mogstored/SideChannelClient.pm may hit the following on I/O error: $self->write("ERR read $uri at $offset failed\r\n"); Be prepared to show that error to tracker watchers (and any other possible errors mogstored may return in the future). --- lib/MogileFS/HTTPFile.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 002841e3..1960a015 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -225,7 +225,9 @@ retry: } elsif ($rv =~ /^ERROR /) { return; # old server, fallback to HTTP } - return undeferr("mogstored failed to handle ($alg $uri)"); + + chomp($rv); + return undeferr("mogstored failed to handle ($alg $uri): $rv"); } sub digest_http { From 74844dc658d96b8d1281f62606a7721d30cb27b8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Jul 2013 02:14:57 +0000 Subject: [PATCH 353/405] add naive MultipleDevice replication policy This can be useful when MultipleHosts is too noisy when hosts differ greatly in storage capacity. The intended target of this policy is a low-priority backup cluster where a single host contains the bulk of the storage with a handful of random machines helping out. The MultipleHosts policy can be too noisy with log messages about running out of suggestions in this case. --- MANIFEST | 2 + .../ReplicationPolicy/MultipleDevices.pm | 108 ++++++++++++++ t/multiple-devices-replpol.t | 135 ++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 lib/MogileFS/ReplicationPolicy/MultipleDevices.pm create mode 100644 t/multiple-devices-replpol.t diff --git a/MANIFEST b/MANIFEST index ea8eadf2..099b3435 100644 --- a/MANIFEST +++ b/MANIFEST @@ -26,6 +26,7 @@ lib/MogileFS/Overview.pm lib/MogileFS/ProcManager.pm lib/MogileFS/Rebalance.pm lib/MogileFS/ReplicationPolicy.pm +lib/MogileFS/ReplicationPolicy/MultipleDevices.pm lib/MogileFS/ReplicationPolicy/MultipleHosts.pm lib/MogileFS/ReplicationPolicy/Union.pm lib/MogileFS/ReplicationRequest.pm @@ -82,6 +83,7 @@ t/70-reaper.t t/checksum.t t/fid-stat.t t/mogstored-shutdown.t +t/multiple-devices-replpol.t t/multiple-hosts-replpol.t t/replpolicy-parsing.t t/replpolicy.t diff --git a/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm b/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm new file mode 100644 index 00000000..afe4e2b2 --- /dev/null +++ b/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm @@ -0,0 +1,108 @@ +package MogileFS::ReplicationPolicy::MultipleDevices; +use strict; +use base 'MogileFS::ReplicationPolicy'; +use MogileFS::Util qw(weighted_list); +use MogileFS::ReplicationRequest qw(ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); + +sub new { + my ($class, $mindevcount) = @_; + return bless { + mindevcount => $mindevcount, + }, $class; +} + +sub new_from_policy_args { + my ($class, $argref) = @_; + # Note: "MultipleDevices()" is okay, in which case the 'mindevcount' + # on the class is used. (see below) + $$argref =~ s/^\s* \( \s* (\d*) \s* \) \s*//x + or die "$class failed to parse args: $$argref"; + return $class->new($1) +} + +sub mindevcount { $_[0]{mindevcount} } + +sub replicate_to { + my ($self, %args) = @_; + + my $fid = delete $args{fid}; # fid scalar to copy + my $on_devs = delete $args{on_devs}; # arrayref of device objects + my $all_devs = delete $args{all_devs}; # hashref of { devid => MogileFS::Device } + my $failed = delete $args{failed}; # hashref of { devid => 1 } of failed attempts this round + + # this is the per-class mindevcount (the old way), which is passed in automatically + # from the replication worker. but if we have our own configured mindevcount + # in class.replpolicy, like "MultipleHosts(3)", then we use the explicit one. otherwise, + # if blank, or zero, like "MultipleHosts()", then we use the builtin on + my $min = delete $args{min}; + $min = $self->{mindevcount} || $min; + + warn "Unknown parameters: " . join(", ", sort keys %args) if %args; + die "Missing parameters" unless $on_devs && $all_devs && $failed && $fid; + + # number of devices we currently live on + my $already_on = @$on_devs; + + return ALL_GOOD if $min == $already_on; + return TOO_GOOD if $already_on > $min; + + # total disks available which are candidates for having files on them + my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; + + my %on_dev = map { $_->id => 1 } @$on_devs; + + # if we have two copies and that's all the disks there are + # anywhere, be happy enough, even if mindevcount is higher. in + # that case, when they add more disks later, they'll need to fsck + # to make files replicate more. + # this is here instead of above in case an over replication error causes + # the file to be on all disks (where more than necessary) + return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; + + my @all_dests = sort { + $b->percent_free <=> $a->percent_free + } grep { + ! $on_dev{$_->devid} && + ! $failed->{$_->devid} && + $_->should_get_replicated_files + } values %$all_devs; + + return TEMP_NO_ANSWER unless @all_dests; + + # Do this little dance to only weight-shuffle the top end of empty devices + @all_dests = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@all_dests, 0, 20)); + + return MogileFS::ReplicationRequest->new( + ideal => \@all_dests, + desperate => [], + ); +} + +1; + +# Local Variables: +# mode: perl +# c-basic-indent: 4 +# indent-tabs-mode: nil +# End: + +__END__ + +=head1 NAME + +MogileFS::ReplicationPolicy::MultipleDevices -- bare-bones replication policy + +=head1 RULES + +This policy only puts files onto different devices. This is intended to be a +quieter alternative to the default MultipleHosts replication policy when hosts +are heavily-imbalanced (one host has much more storage capacity than another). +This aims to avoid the noisy "policy_no_suggestions" log messages in clusters +where one large host contains the bulk of the storage. + +=head1 SEE ALSO + +L + +L diff --git a/t/multiple-devices-replpol.t b/t/multiple-devices-replpol.t new file mode 100644 index 00000000..42ac924a --- /dev/null +++ b/t/multiple-devices-replpol.t @@ -0,0 +1,135 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::ReplicationPolicy::MultipleDevices; +use MogileFS::Test; + +# already good. +is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), + "all_good", "all good"); + +# need to get it onto host2... +is(rr("min=2 h1[d1=X d2=_] h2[d3=_ d4=_]"), + "ideal(2,3,4)", "any device will do"); + +# still needs to be on host2, even though 2 copies on host1 +is(rr("min=2 h1[d1=X d2=X] h2[d3=_ d4=_]"), + "all_good", "2 devices is enough"); + +# anywhere will do. (can happen on, say, rebalance) +is(rr("min=2 h1[d1=_ d2=_] h2[d3=_ d4=_]"), + "ideal(1,2,3,4)", "anywhere"); + +# should desperately try d2, since host2 is down +is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_]"), + "ideal(2)"); + +# should try host3, since host2 is down +is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_] h3[d5=_ d6=_]"), + "ideal(2,5,6)"); + +# need a copy on a non-dead disk on host1 +is(rr("min=2 h1[d1=_ d2=X,dead] h2=alive[d3=X d4=_]"), + "ideal(1,4)"); + +# this is an ideal move, since we only have 2 unique hosts: +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_]"), + "ideal(1,4)"); + +# ... but if we have a 3rd host, well, it could be there +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3[d5=_]"), + "ideal(1,4,5)"); + +# ... unless that host is down, in which case it's back to 1/4, +# but desperately +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3=down[d5=_]"), + "ideal(1,4)"); + +# too good, uniq hosts > min +is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_] h3[d5=X]"), + "too_good"); + +# too good, but but with uniq hosts == min +is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), + "too_good"); + +# be happy with 3 copies, even though two are on same host (that's our max unique hosts) +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), + "all_good"); + +# be happy with one drain copy +is(rr("min=2 h1[d3=X,drain d5=_] h2[d4=X d6=_]"), + "all_good", + "we are happy with one copy in a drain device"); + +# drain copy counts +is(rr("min=2 h1[d3=X,drain d5=X] h2[d4=X d6=_]"), + "too_good", + "the extra copy in drain leaves us too satisfied"); + +sub rr { + my ($state) = @_; + my $ostate = $state; # original + + MogileFS::Factory::Host->t_wipe; + MogileFS::Factory::Device->t_wipe; + MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + my $hfac = MogileFS::Factory::Host->get_factory; + my $dfac = MogileFS::Factory::Device->get_factory; + + my $min = 2; + if ($state =~ s/^\bmin=(\d+)\b//) { + $min = $1; + } + + my $hosts = {}; + my $devs = {}; + my $on_devs = []; + + my $parse_error = sub { + die "Can't parse:\n $ostate\n" + }; + while ($state =~ s/\bh(\d+)(?:=(.+?))?\[(.+?)\]//) { + my ($n, $opts, $devstr) = ($1, $2, $3); + $opts ||= ""; + die "dup host $n" if $hosts->{$n}; + + my $h = $hosts->{$n} = $hfac->set({ hostid => $n, + status => ($opts || "alive"), observed_state => "reachable", + hostname => $n }); + + foreach my $ddecl (split(/\s+/, $devstr)) { + $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ + or $parse_error->(); + my ($dn, $on_not, $status) = ($1, $2, $3); + die "dup device $dn" if $devs->{$dn}; + my $d = $devs->{$dn} = $dfac->set({ devid => $dn, + hostid => $h->id, observed_state => "writeable", + status => ($status || "alive"), mb_total => 1000, + mb_used => 100, }); + if ($on_not eq "X" && $d->dstate->should_have_files) { + push @$on_devs, $d; + } + } + } + $parse_error->() if $state =~ /\S/; + + my $polclass = "MogileFS::ReplicationPolicy::MultipleDevices"; + my $pol = $polclass->new; + my $rr = $pol->replicate_to( + fid => 1, + on_devs => $on_devs, + all_devs => $devs, + failed => {}, + min => $min, + ); + return $rr->t_as_string; +} + +done_testing(); From 1f8bc08ee21af6b5b4e29f5a0a2eeedb81eacbb2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 31 Mar 2013 13:25:30 +0000 Subject: [PATCH 354/405] store: do not auto-reconnect while holding a lock Auto-reconnect is probably always unsafe while a holding a lock on all networked databases. While we do not use the builtin auto-reconnect functionality of MySQL, any auto-reconnect implementation should be affected by the same issues upon connection failure: https://dev.mysql.com/doc/refman/5.6/en/auto-reconnect.html With auto-reconnect, we could be operating under the false assumption we have a lock after the reconnect when we do not. For now, the easiest method of recovery is to just let the worker die while working on the current task and have the ProcManager restart it. --- lib/MogileFS/Store.pm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 656f9a62..cdb5c31c 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -362,6 +362,11 @@ sub dbh { return if (defined $flag && $flag == 0);; } + # auto-reconnect is unsafe if we're holding a lock + if ($self->{lock_depth}) { + die "DB connection recovery unsafe, lock held: $self->{last_lock}"; + } + eval { local $SIG{ALRM} = sub { die "timeout\n" }; alarm($self->connect_timeout); From 8a5bad0de0baacacadd7c05b1efa73f67af4146c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 1 Apr 2013 02:55:51 +0000 Subject: [PATCH 355/405] store: do not disconnect for max_handles while locked Dropping a connection while holding an advisory lock with MySQL or Postgres will cause a fatal error, so hold the connection open until the next time the dbh is requested without holding a lock. --- lib/MogileFS/Store.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index cdb5c31c..c6a7cf74 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -347,8 +347,10 @@ sub dbh { if ($self->{recheck_done_gen} != $self->{recheck_req_gen}) { $self->{dbh} = undef unless $self->{dbh}->ping; # Handles a memory leak under Solaris/Postgres. + # We may leak a little extra memory if we're holding a lock, + # since dropping a connection mid-lock is fatal $self->{dbh} = undef if ($self->{max_handles} && - $self->{handles_left}-- < 0); + $self->{handles_left}-- < 0 && !$self->{lock_depth}); $self->{recheck_done_gen} = $self->{recheck_req_gen}; } return $self->{dbh} if $self->{dbh}; From ad8de9fb7143ad2f51fb9151f14eb4a5a941e63d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 1 Apr 2013 23:10:04 +0000 Subject: [PATCH 356/405] allow startup without job_master (and dependent workers) And when running without a job_master, do not spawn job_master-dependent workers (delete, fsck, replicate) as those workers will never get work. Running a queryworker+monitor in a remote datacenter makes sense with the MogileFS::Network plugins since the "create_close" size verification is faster and more reliable if the queryworker is in the same datacenter as the client, even if the master DB is in a remote datacenter. Being in a remote datacenter, (master)DB-intensive operations from delete, fsck and replicate workers can encounter high latency and an unreliable link, so admins may disable those workers in this situation. However, disabling delete, fsck, and replicate workers individually still allows the job_master to fill the initial queues (which is never processed) and prevent other trackers from processing items for 1000 seconds. Future commits may allow job_master to ignore certain queues if there are zero workers for that queue, but for now, stopping job_master entirely should be sufficient for most users with trackers in a different datacenter than the DB. P.S. It also makes sense to disable reaper in remote datacenters, too, but reaper does not rely on job_master. --- MANIFEST | 1 + lib/MogileFS/Config.pm | 3 ++ lib/MogileFS/ProcManager.pm | 34 ++++++++++++++++++++- t/00-startup.t | 8 +++-- t/80-job_master.t | 61 +++++++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 t/80-job_master.t diff --git a/MANIFEST b/MANIFEST index 099b3435..84c27a6c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -80,6 +80,7 @@ t/50-checksum.t t/51-checksum_class_change.t t/60-fsck.t t/70-reaper.t +t/80-job_master.t t/checksum.t t/fid-stat.t t/mogstored-shutdown.t diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 513dfea3..3e3b4d19 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -62,6 +62,7 @@ our ( $fsck_jobs, $reaper_jobs, $monitor_jobs, + $job_master, # boolean $max_handles, $min_free_space, $max_disk_age, @@ -105,6 +106,7 @@ sub load_config { 'repl_use_get_port=i' => \$cmdline{repl_use_get_port}, 'local_network=s' => \$cmdline{local_network}, 'mogstored_stream_port' => \$cmdline{mogstored_stream_port}, + 'job_master!' => \$cmdline{job_master}, ); # warn of old/deprecated options @@ -154,6 +156,7 @@ sub load_config { $replicate_jobs = choose_value( 'replicate_jobs', 1 ); $fsck_jobs = choose_value( 'fsck_jobs', 1 ); $reaper_jobs = choose_value( 'reaper_jobs', 1 ); + $job_master = choose_value( 'job_master', 1 ); $monitor_jobs = choose_value( 'monitor_jobs', 1 ); $min_free_space = choose_value( 'min_free_space', 100 ); $max_disk_age = choose_value( 'max_disk_age', 5 ); diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 0a1bf2a1..e8a06ff9 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -38,6 +38,10 @@ my %child = (); # pid -> MogileFS::Connection::Worker my %todie = (); # pid -> 1 (lists pids that we've asked to die) my %jobs = (); # jobname -> [ min, current ] +# we start job_master after monitor has run, but this avoid undef warning +# in job_needs_reduction +$jobs{job_master} = [ 0, 0 ]; + our $allkidsup = 0; # if true, all our kids are running. set to 0 when a kid dies. my @prefork_cleanup; # subrefs to run to clean stuff up before we make a new child @@ -48,6 +52,14 @@ my $monitor_good = 0; # ticked after monitor executes once after startup my $nowish; # updated approximately once per second +# it's pointless to spawn certain jobs without a job_master +my $want_job_master; +my %needs_job_master = ( + delete => 1, + fsck => 1, + replicate => 1, +); + sub push_pre_fork_cleanup { my ($class, $code) = @_; push @prefork_cleanup, $code; @@ -173,6 +185,10 @@ sub PostEventLoopChecker { # foreach job, fork enough children while (my ($job, $jobstat) = each %jobs) { + + # do not spawn job_master-dependent workers if we have no job_master + next if (! $want_job_master && $needs_job_master{$job}); + my $need = $jobstat->[0] - $jobstat->[1]; if ($need > 0) { error("Job $job has only $jobstat->[1], wants $jobstat->[0], making $need."); @@ -319,6 +335,8 @@ sub request_job_process { return 0 unless $class->is_valid_job($job); return 0 if ($job =~ /^(?:job_master|monitor)$/i && $n > 1); # ghetto special case + $want_job_master = $n if ($job eq "job_master"); + $jobs{$job}->[0] = $n; $allkidsup = 0; @@ -781,6 +799,16 @@ sub note_pending_death { # see if we should reduce the number of active children sub job_needs_reduction { my $job = shift; + my $q; + + # drop job_master-dependent workers if there is no job_master and no + # previously queued work + if (!$want_job_master && $needs_job_master{$job} + && $jobs{job_master}->[1] == 0 # check if job_master is really dead + && (($q = $pending_work{$job}) && !@$q || !$q)) { + return 1; + } + return $jobs{$job}->[0] < $jobs{$job}->[1]; } @@ -815,7 +843,11 @@ sub send_monitor_has_run { MogileFS::ProcManager->set_min_workers('replicate' => MogileFS->config('replicate_jobs')); MogileFS::ProcManager->set_min_workers('reaper' => MogileFS->config('reaper_jobs')); MogileFS::ProcManager->set_min_workers('fsck' => MogileFS->config('fsck_jobs')); - MogileFS::ProcManager->set_min_workers('job_master' => 1); + + # only one job_master at most + $want_job_master = !!MogileFS->config('job_master'); + MogileFS::ProcManager->set_min_workers('job_master' => $want_job_master); + $monitor_good = 1; $allkidsup = 0; } diff --git a/t/00-startup.t b/t/00-startup.t index a867ea8a..62be86f4 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -446,10 +446,12 @@ foreach my $t (qw(file file_on file_to_delete)) { my @jobs = qw(fsck queryworker delete replicate reaper monitor job_master); foreach my $j (@jobs) { - ok(want($c, 0, $j), "shut down all $j"); + ok(want($c, 0, $j), "shut down all $j"); } - foreach my $j (@jobs) { - ok(want($c, 1, $j), "start 1 $j"); + + # spawn job_master first to ensure delete/fsck/replicate can start + foreach my $j (reverse @jobs) { + ok(want($c, 1, $j), "start 1 $j"); } } diff --git a/t/80-job_master.t b/t/80-job_master.t new file mode 100644 index 00000000..30055dbf --- /dev/null +++ b/t/80-job_master.t @@ -0,0 +1,61 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use MogileFS::Server; +use MogileFS::Test; + +my @jm_jobs = qw(fsck delete replicate); +my $jobs; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my $tmptrack = create_temp_tracker($sto, ["--no-job_master"]); +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; + +sub jobs { + my ($admin) = @_; + my %ret; + + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + $line =~ s/\r\n//; + last if $line eq "."; + $line =~ /^(\w+ \w+)\s*(.*)$/ or die "Failed to parse $line\n"; + $ret{$1} = $2; + } + return \%ret; +} + +ok(try_for(30, sub { jobs($admin)->{"queryworker count"} }), "wait for queryworker"); + +$jobs = jobs($admin); +foreach my $job (@jm_jobs) { + ok(!$jobs->{"$job count"}, "no $job workers"); +} + +# enable job master +want($admin, 1, "job_master"); + +ok(try_for(30, sub { jobs($admin)->{"queryworker count"} }), "wait for queryworker"); + +foreach my $job (@jm_jobs) { + ok(try_for(30, sub { jobs($admin)->{"$job count"} }), "wait for $job"); +} + +# disable job_master again +want($admin, 0, "job_master"); + +foreach my $job (@jm_jobs) { + ok(try_for(30, sub { !jobs($admin)->{"$job count"} }), "wait for $job to die"); +} + +done_testing(); From 18e8a6cf60dc8aacaa1d8eecfdd25af41c55f6d5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Jul 2013 22:39:24 +0000 Subject: [PATCH 357/405] increase receive buffers for large state events The monitor may send large state events for large installations with many hosts, devices, domains, or classes. The 1K default is too small and leads to excessive syscalls and string operations. This increases startup performance for a mock instance with 10K domains and 10K non-default classes. Using the parent_ping function and "No simple reply" warning as an informal benchmark, this change reduces the loop time from 12 to 10 loops. --- lib/MogileFS/Connection/Worker.pm | 10 ++++++++-- lib/MogileFS/Server.pm | 15 +++++++++++++++ lib/MogileFS/Worker.pm | 2 +- lib/MogileFS/Worker/Query.pm | 2 +- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Connection/Worker.pm b/lib/MogileFS/Connection/Worker.pm index 7c053b1c..aef26d97 100644 --- a/lib/MogileFS/Connection/Worker.pm +++ b/lib/MogileFS/Connection/Worker.pm @@ -8,6 +8,7 @@ use base qw{Danga::Socket}; use fields ( 'read_buf', + 'read_size', # bigger for monitor 'job', 'pid', 'reqid', @@ -27,6 +28,7 @@ sub new { $self->{job} = undef; $self->{last_alive} = time(); $self->{known_state} = {}; + $self->{read_size} = 1024; return $self; } @@ -50,7 +52,7 @@ sub event_read { # if we read data from it, it's not blocked on something else. $self->note_alive; - my $bref = $self->read(1024); + my $bref = $self->read($self->{read_size}); return $self->close() unless defined $bref; $self->{read_buf} .= $$bref; @@ -73,7 +75,11 @@ sub event_write { sub job { my MogileFS::Connection::Worker $self = shift; return $self->{job} unless @_; - return $self->{job} = shift; + my $j = shift; + + # monitor may send huge state events (which we send to everyone else) + $self->{read_size} = Mgd::UNIX_RCVBUF_SIZE() if ($j eq 'monitor'); + $self->{job} = $j; } sub wants_todo { diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 2bc8ad77..b56ae706 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -203,6 +203,21 @@ use strict; use warnings; use MogileFS::Config; use MogileFS::Util qw(error fatal debug); # for others calling Mgd::foo() +use Socket qw(SOL_SOCKET SO_RCVBUF AF_UNIX SOCK_STREAM PF_UNSPEC); +BEGIN { + # detect the receive buffer size for Unix domain stream sockets, + # we assume the size is identical across all Unix domain sockets. + socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, PF_UNSPEC) + or die( "socketpair failed: $!" ); + + my $r = getsockopt($s1, SOL_SOCKET, SO_RCVBUF); + defined $r or die "getsockopt: $!"; + $r = unpack('i', $r) if defined $r; + $r = (defined $r && $r > 0) ? $r : 8192; + close $s1; + close $s2; + eval 'use constant UNIX_RCVBUF_SIZE => $r'; +} sub server { return MogileFS::Server->server; diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 2af192e2..f32ff60b 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -131,7 +131,7 @@ sub read_from_parent { while (MogileFS::Util::wait_for_readability(fileno($psock), $timeout)) { $timeout = 0; # only wait on the timeout for the first read. my $buf; - my $rv = sysread($psock, $buf, 1024); + my $rv = sysread($psock, $buf, Mgd::UNIX_RCVBUF_SIZE()); if (!$rv) { if (defined $rv) { die "While reading pipe from parent, got EOF. Parent's gone. Quitting.\n"; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 4ab5910c..185420c2 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -59,7 +59,7 @@ sub work { } my $newread; - my $rv = sysread($psock, $newread, 1024); + my $rv = sysread($psock, $newread, Mgd::UNIX_RCVBUF_SIZE()); if (!$rv) { if (defined $rv) { die "While reading pipe from parent, got EOF. Parent's gone. Quitting.\n"; From be0f167046bf4b0b695572bdd249ecac9d3a5054 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 11 Jul 2013 00:02:48 +0000 Subject: [PATCH 358/405] monitor: do not repeat join() for the debug statement This join() takes about 20ms on my mock instance with 10K domains and 10K classes, so it has some impact on startup performance. --- lib/MogileFS/Worker/Monitor.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 7a547096..6e11f55b 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -195,9 +195,9 @@ sub send_events_to_parent { } return unless @flat; $self->{events} = []; - # TODO: Maybe wasting too much CPU building this debug line every time... - debug("sending state changes " . join(' ', ':monitor_events', @flat), 2); - $self->send_to_parent(join(' ', ':monitor_events', @flat)); + my $events = join(' ', ':monitor_events', @flat); + debug("sending state changes $events", 2); + $self->send_to_parent($events); } sub add_event { From b5db21172d0f26abe4f3b06d42d5008fcc1e3686 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 11 Jul 2013 00:14:42 +0000 Subject: [PATCH 359/405] do not replay :monitor_events back to the monitor This was excessively expensive for my instance with 10K domains and 10K classes. Applying state information without incurring IPC/scheduling costs allows non-monitor workers to start up within ~4 seconds of the monitor starting up. --- lib/MogileFS/ProcManager.pm | 7 ++++--- lib/MogileFS/Util.pm | 7 +++++-- lib/MogileFS/Worker/Monitor.pm | 14 ++++++++++---- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index e8a06ff9..e8b9d9cf 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -705,10 +705,11 @@ sub HandleChildRequest { } elsif ($cmd =~ /^:monitor_events/) { # Apply the state locally, so when we fork children they have a # pre-parsed factory. - # Also replay the event back where it came, so the same mechanism - # applies and uses local changes. + # We do not replay the events back to where it came, since this + # severely impacts startup performance for instances with several + # thousand domains, classes, hosts or devices. apply_state_events(\$cmd); - MogileFS::ProcManager->send_to_all_children($cmd); + MogileFS::ProcManager->send_to_all_children($cmd, $child); } elsif ($cmd eq ":monitor_just_ran") { send_monitor_has_run($child); diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 10eacfa8..2a0816d9 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -11,7 +11,7 @@ our @EXPORT_OK = qw( error undeferr debug fatal daemonize weighted_list every wait_for_readability wait_for_writeability throw error_code max min first okay_args device_state eurl decode_url_args - encode_url_args apply_state_events + encode_url_args apply_state_events apply_state_events_list ); # Applies monitor-job-supplied state events against the factory singletons. @@ -20,7 +20,10 @@ our @EXPORT_OK = qw( sub apply_state_events { my @events = split(/\s/, ${$_[0]}); shift @events; # pop the :monitor_events part + apply_state_events_list(@events); +} +sub apply_state_events_list { # This will needlessly fetch domain/class/host most of the time. # Maybe replace with something that "caches" factories? my %factories = ( 'domain' => MogileFS::Factory::Domain->get_factory, @@ -28,7 +31,7 @@ sub apply_state_events { 'host' => MogileFS::Factory::Host->get_factory, 'device' => MogileFS::Factory::Device->get_factory, ); - for my $ev (@events) { + for my $ev (@_) { my $args = decode_url_args($ev); my $mode = delete $args->{ev_mode}; my $type = delete $args->{ev_type}; diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 6e11f55b..758fd135 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -17,7 +17,7 @@ use fields ( use Danga::Socket 1.56; use MogileFS::Config; -use MogileFS::Util qw(error debug encode_url_args); +use MogileFS::Util qw(error debug encode_url_args apply_state_events_list); use MogileFS::IOStatWatcher; use MogileFS::Server; use Digest::MD5 qw(md5_base64); @@ -195,9 +195,15 @@ sub send_events_to_parent { } return unless @flat; $self->{events} = []; - my $events = join(' ', ':monitor_events', @flat); - debug("sending state changes $events", 2); - $self->send_to_parent($events); + + { + # $events can be several MB, so let it go out-of-scope soon: + my $events = join(' ', ':monitor_events', @flat); + debug("sending state changes $events", 2); + $self->send_to_parent($events); + } + + apply_state_events_list(@flat); } sub add_event { From 08c83b1d9fef277eb97a6e8a0d9f52095f40191e Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 7 Aug 2013 21:05:13 -0700 Subject: [PATCH 360/405] Checking in changes prior to tagging of version 2.68. Changelog diff is: diff --git a/CHANGES b/CHANGES index f0f578c..b74f7f4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,35 @@ +2013-08-07: Release version 2.68 + + * optimize monitor worker for large installs (Eric Wong ) + + * allow startup without job_master (and dependent workers) (Eric Wong ) + + * store: do not disconnect for max_handles while locked (Eric Wong ) + + * store: do not auto-reconnect while holding a lock (Eric Wong ) + + * add naive MultipleDevice replication policy (Eric Wong ) + + * httpfile: log mogstored I/O errors when checksumming (Eric Wong ) + + * ProcManager: log socketpair errors correctly (Eric Wong ) + + * fix "drain" handling used by MultipleHosts replpolicy (Eric Wong ) + + * httpfile: correct timeouts for sidechannel digest (Eric Wong ) + + * httpfile: correct FILE_MISSING check in digest_mgmt (Eric Wong ) + + * mogstored: avoid bareword on IO::AIO w/o fadvise (Eric Wong ) + + * ProcManager: only log times_out_of_qworkers for new queries (Eric Wong ) + + * Don't emit warnings if we're lacking the space free of a device. If we don't find space on devices with known space free, try the unknowns. (Dave Lambley ) + + * list_keys: escape in Store, allow [%\\] as prefix (Eric Wong ) + + * list_keys: consistent ESCAPE usage across DB types (Eric Wong ) + 2013-02-02: Release version 2.67 * Serialize tempfile reaping (dormando ) --- CHANGES | 32 ++++++++++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index f0f578c8..b74f7f4e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,35 @@ +2013-08-07: Release version 2.68 + + * optimize monitor worker for large installs (Eric Wong ) + + * allow startup without job_master (and dependent workers) (Eric Wong ) + + * store: do not disconnect for max_handles while locked (Eric Wong ) + + * store: do not auto-reconnect while holding a lock (Eric Wong ) + + * add naive MultipleDevice replication policy (Eric Wong ) + + * httpfile: log mogstored I/O errors when checksumming (Eric Wong ) + + * ProcManager: log socketpair errors correctly (Eric Wong ) + + * fix "drain" handling used by MultipleHosts replpolicy (Eric Wong ) + + * httpfile: correct timeouts for sidechannel digest (Eric Wong ) + + * httpfile: correct FILE_MISSING check in digest_mgmt (Eric Wong ) + + * mogstored: avoid bareword on IO::AIO w/o fadvise (Eric Wong ) + + * ProcManager: only log times_out_of_qworkers for new queries (Eric Wong ) + + * Don't emit warnings if we're lacking the space free of a device. If we don't find space on devices with known space free, try the unknowns. (Dave Lambley ) + + * list_keys: escape in Store, allow [%\\] as prefix (Eric Wong ) + + * list_keys: consistent ESCAPE usage across DB types (Eric Wong ) + 2013-02-02: Release version 2.67 * Serialize tempfile reaping (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index b56ae706..45aedba4 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.67"; +$VERSION = "2.68"; =head1 NAME From d9d3a9bf6d1897d52aa0390ba105c7ae74cb9b86 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Sep 2012 23:21:40 +0000 Subject: [PATCH 361/405] move Danga::Socket->Reset to ProcManager We will be using Danga::Socket in more (possibly all) workers, not just the Monitor and Reaper. Resetting in workers that do not use Danga::Socket is harmless and will not allocate epoll/kqueue descriptors until the worker actually uses Danga::Socket. --- lib/MogileFS/ProcManager.pm | 5 +++-- lib/MogileFS/Worker/Monitor.pm | 4 ---- lib/MogileFS/Worker/Reaper.pm | 4 ---- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index e8b9d9cf..032cb561 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -363,8 +363,9 @@ sub SetAsChild { %idle_workers = (); %pending_work = (); - # and now kill off our event loop so that we don't waste time - Danga::Socket->SetPostLoopCallback(sub { return 0; }); + # we just forked from our parent process, also using Danga::Socket, + # so we need to lose all that state and start afresh. + Danga::Socket->Reset; } # called when a child has died. a child is someone doing a job for us, diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 758fd135..beb18f6b 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -117,10 +117,6 @@ sub usage_refresh { sub work { my $self = shift; - # we just forked from our parent process, also using Danga::Socket, - # so we need to lose all that state and start afresh. - Danga::Socket->Reset; - my $iow = $self->{iow}; $iow->on_stats(sub { my ($hostname, $stats) = @_; diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index b7884774..528a5cbc 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -144,10 +144,6 @@ sub reap_dev_backoff_delay { sub work { my $self = shift; - # we just forked from our parent process, also using Danga::Socket, - # so we need to lose all that state and start afresh. - Danga::Socket->Reset; - # ensure we get monitor updates Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); From d8cd470b66ba015b0e55706a3d3ec5fbddc1c6e0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Sep 2012 23:21:40 +0000 Subject: [PATCH 362/405] monitor: refactor/rewrite to use new async API In order to migrate to the upcoming Danga::Socket-based HTTP API, we'll first refactor monitor to use the new API (but preserve LWP usage behind-the-scenes). DEBUG=1 users will see the elapsed time for all device refreshes each time monitor runs. While we're at it, also guard against race conditions on the PUT/GET test by double-checking on failure. (A long-standing TODO item) also squashed the following commit: use conn_timeout in monitor, node_timeout in other workers This matches the behavior in MogileFS:Server 2.65. It makes sense to use a different, lower timeout in monitor to quickly detect overloaded nodes and avoid propagating their liveness for a monitoring period. It also makes sense to use a higher value for node_timeout in other workers since other actions are less fault-tolerant. For example, a timed-out size check in create_close may cause a client to eventually reupload the file, creating even more load on the cluster. --- MANIFEST | 1 + lib/MogileFS/Connection/Parent.pm | 35 +++ lib/MogileFS/Host.pm | 48 ++++ lib/MogileFS/Worker.pm | 5 + lib/MogileFS/Worker/Monitor.pm | 462 +++++++++++++++++++++--------- 5 files changed, 410 insertions(+), 141 deletions(-) create mode 100644 lib/MogileFS/Connection/Parent.pm diff --git a/MANIFEST b/MANIFEST index 84c27a6c..e7ba10bc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -12,6 +12,7 @@ lib/MogileFS/Class.pm lib/MogileFS/Config.pm lib/MogileFS/Connection/Client.pm lib/MogileFS/Connection/Mogstored.pm +lib/MogileFS/Connection/Parent.pm lib/MogileFS/Connection/Worker.pm lib/MogileFS/DevFID.pm lib/MogileFS/Device.pm diff --git a/lib/MogileFS/Connection/Parent.pm b/lib/MogileFS/Connection/Parent.pm new file mode 100644 index 00000000..d28ffa65 --- /dev/null +++ b/lib/MogileFS/Connection/Parent.pm @@ -0,0 +1,35 @@ +package MogileFS::Connection::Parent; +# maintains a connection in a worker process to the parent ProcManager process +# Only used by workers that use the Danga::Socket->EventLoop internally +# currently only Monitor +use warnings; +use strict; +use Danga::Socket (); +use base qw{Danga::Socket}; +use fields qw(worker); + +sub new { + my ($self, $worker) = @_; + $self = fields::new($self) unless ref $self; + $self->SUPER::new($worker->psock); + $self->{worker} = $worker; + + return $self; +} + +sub ping { + my ($self) = @_; + + $self->write(":ping\r\n"); +} + +sub event_read { + my ($self) = @_; + + $self->{worker}->read_from_parent; +} + +sub event_hup { $_[0]->close } +sub event_err { $_[0]->close } + +1; diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 49964bc1..6b0c5601 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -6,6 +6,15 @@ use Net::Netmask; use Carp qw(croak); use MogileFS::Connection::Mogstored; +# temporary... +use LWP::UserAgent; +sub ua { + LWP::UserAgent->new( + timeout => MogileFS::Config->config('conn_timeout') || 2, + keep_alive => 20, + ); +} + =head1 MogileFS::Host - host class @@ -100,4 +109,43 @@ sub sidechannel_port { MogileFS->config("mogstored_stream_port"); } +sub _http_req { + my ($self, $method, $uri, $opts, $cb) = @_; + + $opts ||= {}; + my $h = $opts->{headers} || {}; + my $req = HTTP::Request->new($method, $uri, [ %$h ]); + $req->content($opts->{content}) if exists $opts->{content}; + Danga::Socket->AddTimer(0, sub { + my $response = $self->ua->request($req); + Danga::Socket->AddTimer(0, sub { $cb->($response) }); + }); +} + +# FIXME: make async +sub http_get { + my ($self, $method, $uri, $opts, $cb) = @_; + + if ($method !~ /\A(?:GET|HEAD)\z/) { + die "Bad method for HTTP get port: $method"; + } + + # convert path-only URL to full URL + if ($uri =~ m{\A/}) { + $uri = 'http://' . $self->ip . ':' . $self->http_get_port . $uri; + } + $self->_http_req($method, $uri, $opts, $cb); +} + +# FIXME: make async +sub http { + my ($self, $method, $uri, $opts, $cb) = @_; + + # convert path-only URL to full URL + if ($uri =~ m{\A/}) { + $uri = 'http://' . $self->ip . ':' . $self->http_port . $uri; + } + $self->_http_req($method, $uri, $opts, $cb); +} + 1; diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index f32ff60b..f3a74dff 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -41,6 +41,11 @@ sub psock_fd { return fileno($self->{psock}); } +sub psock { + my $self = shift; + return $self->{psock}; +} + sub validate_dbh { return Mgd::validate_dbh(); } diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index beb18f6b..4653d886 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -5,14 +5,21 @@ use warnings; use base 'MogileFS::Worker'; use fields ( 'last_test_write', # devid -> time. time we last tried writing to a device. + 'monitor_start', # main monitor start time 'skip_host', # hostid -> 1 if already noted dead (reset every loop) 'seen_hosts', # IP -> 1 (reset every loop) - 'ua', # LWP::UserAgent for checking usage files 'iow', # MogileFS::IOStatWatcher object 'prev_data', # DB data from previous run 'devutil', # Running tally of device utilization 'events', # Queue of state events + 'refresh_state', # devid -> { used, total, callbacks }, temporary data in each refresh run 'have_masterdb', # Hint flag for if the master DB is available + 'updateable_devices', # devid -> Device, avoids device table updates + 'parent', # socketpair to parent process + 'refresh_pending', # set if there was a manually-requested refresh + 'db_monitor_ran', # We announce "monitor_just_ran" every time the + # device checks are run, but only if the DB has + # been checked inbetween. ); use Danga::Socket 1.56; @@ -20,6 +27,7 @@ use MogileFS::Config; use MogileFS::Util qw(error debug encode_url_args apply_state_events_list); use MogileFS::IOStatWatcher; use MogileFS::Server; +use MogileFS::Connection::Parent; use Digest::MD5 qw(md5_base64); use constant UPDATE_DB_EVERY => 15; @@ -33,7 +41,7 @@ sub new { $self->{iow} = MogileFS::IOStatWatcher->new; $self->{prev_data} = { domain => {}, class => {}, host => {}, device => {} }; - $self->{devutil} = { cur => {}, prev => {} }; + $self->{devutil} = { cur => {}, prev => {}, tmp => {} }; $self->{events} = []; $self->{have_masterdb} = 0; return $self; @@ -43,9 +51,16 @@ sub watchdog_timeout { 30; } +# returns 1 if a DB update was attempted +# returns 0 immediately if the (device) monitor is already running sub cache_refresh { my $self = shift; + if ($self->{refresh_state}) { + debug("Monitor run in progress, will not check for DB updates"); + return 0; + } + debug("Monitor running; checking DB for updates"); # "Fix" our local cache of this flag, so we always check the master DB. MogileFS::Config->cache_server_setting('_master_db_alive', 1); @@ -67,56 +82,113 @@ sub cache_refresh { } $self->send_events_to_parent; + $self->{db_monitor_ran} = 1; + + return 1; } sub usage_refresh { - my $self = shift; + my ($self) = @_; + + # prevent concurrent refresh + return if $self->{refresh_state}; debug("Monitor running; scanning usage files"); + + $self->{refresh_state} = {}; # devid -> ... + $self->{monitor_start} = Time::HiRes::time(); + my $have_dbh = $self->validate_dbh; - my $updateable_devices; # See if we should be allowed to update the device table rows. if ($have_dbh && Mgd::get_store()->get_lock('mgfs:device_update', 0)) { # Fetch the freshlist list of entries, to avoid excessive writes. - $updateable_devices = { map { $_->{devid} => $_ } + $self->{updateable_devices} = { map { $_->{devid} => $_ } Mgd::get_store()->get_all_devices }; + } else { + $self->{updateable_devices} = undef; } $self->{skip_host} = {}; # hostid -> 1 if already noted dead. $self->{seen_hosts} = {}; # IP -> 1 my $dev_factory = MogileFS::Factory::Device->get_factory(); + my $devutil = $self->{devutil}; - my $cur_iow = {}; - # Run check_devices to test host/devs. diff against old values. + $devutil->{tmp} = {}; + # kick off check_device to test host/devs. diff against old values. for my $dev ($dev_factory->get_all) { if (my $state = $self->is_iow_diff($dev)) { $self->state_event('device', $dev->id, {utilization => $state}); } - $cur_iow->{$dev->id} = $self->{devutil}->{cur}->{$dev->id}; - next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev, $have_dbh, $updateable_devices) - if $dev->can_read_from; - $self->still_alive; # Ping parent if needed so we don't time out - # given lots of devices. + $devutil->{tmp}->{$dev->id} = $devutil->{cur}->{$dev->id}; + + $dev->can_read_from or next; + $self->check_device_begin($dev); } + # we're done if we didn't schedule any work + $self->usage_refresh_done unless keys %{$self->{refresh_state}}; +} + +sub usage_refresh_done { + my ($self) = @_; - if ($have_dbh && $updateable_devices) { + if ($self->{updateable_devices}) { Mgd::get_store()->release_lock('mgfs:device_update'); + $self->{updateable_devices} = undef; } - $self->{devutil}->{prev} = $cur_iow; + $self->{devutil}->{prev} = $self->{devutil}->{tmp}; # Set the IOWatcher hosts (once old monitor code has been disabled) $self->send_events_to_parent; $self->{iow}->set_hosts(keys %{$self->{seen_hosts}}); + + foreach my $devid (keys %{$self->{refresh_state}}) { + error("device check incomplete for dev$devid"); + } + + my $start = delete $self->{monitor_start}; + my $elapsed = Time::HiRes::time() - $start; + debug("device refresh finished after $elapsed"); + + $self->{refresh_state} = undef; + my $pending_since = $self->{refresh_pending}; + + # schedule another usage_refresh immediately if somebody requested it + # Don't announce :monitor_just_ran if somebody requested a refresh + # while we were running, we could've been refreshing on a stale DB + if ($pending_since && $pending_since > $start) { + # using AddTimer to schedule the refresh to avoid stack overflow + # since usage_refresh can call usage_refresh_done directly if + # there are no devices + Danga::Socket->AddTimer(0, sub { + $self->cache_refresh; + $self->usage_refresh; + }); + } + + # announce we're done if we ran on schedule, or we had a + # forced refresh that was requested before we started. + if (!$pending_since || $pending_since <= $start) { + # totally done refreshing, accept manual refresh requests again + $self->{parent}->watch_read(1); + delete $self->{refresh_pending}; + if (delete $self->{db_monitor_ran} || $pending_since) { + $self->send_to_parent(":monitor_just_ran"); + } + } } sub work { my $self = shift; + # It makes sense to have monitor use a shorter timeout + # (conn_timeout) across the board to skip slow hosts. Other workers + # are less tolerant, and may use a higher value in node_timeout. + MogileFS::Config->set_config_no_broadcast("node_timeout", MogileFS::Config->config("conn_timeout")); + my $iow = $self->{iow}; $iow->on_stats(sub { my ($hostname, $stats) = @_; @@ -129,15 +201,17 @@ sub work { } }); - # We announce "monitor_just_ran" every time the device checks are run, but - # only if the DB has been checked inbetween. - my $db_monitor_ran = 0; - my $db_monitor; $db_monitor = sub { - $self->parent_ping; - $self->cache_refresh; - $db_monitor_ran++; + $self->still_alive; + + # reschedule immediately if we were blocked by main_monitor. + # setting refresh_pending will call cache_refresh again + if (!$self->cache_refresh) { + $self->{refresh_pending} ||= Time::HiRes::time(); + } + + # always reschedule in 4 seconds, regardless Danga::Socket->AddTimer(4, $db_monitor); }; @@ -146,17 +220,14 @@ sub work { my $main_monitor; $main_monitor = sub { - $self->parent_ping; + $self->{parent}->ping; $self->usage_refresh; - if ($db_monitor_ran) { - $self->send_to_parent(":monitor_just_ran"); - $db_monitor_ran = 0; - } Danga::Socket->AddTimer(2.5, $main_monitor); }; - $main_monitor->(); - Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + $self->parent_ping; # ensure we get the initial DB state back + $self->{parent} = MogileFS::Connection::Parent->new($self); + Danga::Socket->AddTimer(0, $main_monitor); Danga::Socket->EventLoop; } @@ -164,9 +235,14 @@ sub process_line { my MogileFS::Worker::Monitor $self = shift; my $lineref = shift; if ($$lineref =~ /^:refresh_monitor$/) { - $self->cache_refresh; - $self->usage_refresh; - $self->send_to_parent(":monitor_just_ran"); + if ($self->cache_refresh) { + $self->usage_refresh; + } else { + $self->{refresh_pending} ||= Time::HiRes::time(); + } + # try to stop processing further refresh_monitor requests + # if we're acting on a manual refresh + $self->{parent}->watch_read(0); return 1; } return 0; @@ -306,55 +382,10 @@ sub grab_all_data { return \%ret; } -sub ua { - my $self = shift; - return $self->{ua} ||= LWP::UserAgent->new( - timeout => MogileFS::Config->config('conn_timeout') || 2, - keep_alive => 20, - ); -} - -sub check_device { - my ($self, $dev, $have_dbh, $updateable_devices) = @_; - +# returns true on success, false on failure +sub check_usage_response { + my ($self, $dev, $response) = @_; my $devid = $dev->id; - my $host = $dev->host; - - my $port = $host->http_port; - my $get_port = $host->http_get_port; # || $port; - my $hostip = $host->ip; - my $url = $dev->usage_url; - - $self->{seen_hosts}{$hostip} = 1; - - # now try to get the data with a short timeout - my $timeout = MogileFS::Config->config('conn_timeout') || 2; - my $start_time = Time::HiRes::time(); - - my $ua = $self->ua; - my $response = $ua->get($url); - my $res_time = Time::HiRes::time(); - - unless ($response->is_success) { - my $failed_after = $res_time - $start_time; - if ($failed_after < 0.5) { - $self->state_event('device', $dev->id, {observed_state => 'unreachable'}) - if (!$dev->observed_unreachable); - error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); - } else { - $failed_after = sprintf("%.02f", $failed_after); - $self->state_event('host', $dev->hostid, {observed_state => 'unreachable'}) - if (!$host->observed_unreachable); - $self->{skip_host}{$dev->hostid} = 1; - error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); - } - return; - } - - # at this point we can reach the host - $self->state_event('host', $dev->hostid, {observed_state => 'reachable'}) - if (!$host->observed_reachable); - $self->{iow}->restart_monitoring_if_needed($hostip); my %stats; my $data = $response->content; @@ -369,93 +400,242 @@ sub check_device { $total = "" unless defined $total; my $clen = length($data || ""); error("dev$devid reports used = $used, total = $total, content-length: $clen, error?"); - return; + return 0; } + my $rstate = $self->{refresh_state}->{$devid}; + ($rstate->{used}, $rstate->{total}) = ($used, $total); + # only update database every ~15 seconds per device - my $now = time(); - if ($have_dbh && $updateable_devices) { - my $devrow = $updateable_devices->{$devid}; + if ($self->{updateable_devices}) { + my $devrow = $self->{updateable_devices}->{$devid}; my $last = ($devrow && $devrow->{mb_asof}) ? $devrow->{mb_asof} : 0; - if ($last + UPDATE_DB_EVERY < $now) { + if ($last + UPDATE_DB_EVERY < time()) { Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), mb_used => int($used / 1024), devid => $devid); } } + return 1; +} + +sub dev_debug { + my ($self, $dev, $writable) = @_; + return unless $Mgd::DEBUG >= 1; + my $devid = $dev->id; + my $rstate = $self->{refresh_state}->{$devid}; + my ($used, $total) = ($rstate->{used}, $rstate->{total}); + + debug("dev$devid: used = $used, total = $total, writeable = $writable"); +} + +sub check_write { + my ($self, $dev) = @_; + my $rstate = $self->{refresh_state}->{$dev->id}; + my $test_write = $rstate->{test_write}; + + if (!$test_write || $test_write->{tries} > 0) { + # this was "$$-$now" before, but we don't yet have a cleaner in + # mogstored for these files + my $num = int(rand 100); + $test_write = $rstate->{test_write} ||= {}; + $test_write->{path} = "/dev${\$dev->id}/test-write/test-write-$num"; + $test_write->{content} = "time=" . time . " rand=$num"; + $test_write->{tries} ||= 2; + } + $test_write->{tries}--; + + my $opts = { content => $test_write->{content} }; + $dev->host->http("PUT", $test_write->{path}, $opts, sub { + my ($response) = @_; + $self->on_check_write_response($dev, $response); + }); +} + +# starts the lengthy device check process +sub check_device_begin { + my ($self, $dev) = @_; + $self->{refresh_state}->{$dev->id} = {}; - # next if we're not going to try this now - return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; - $self->{last_test_write}{$devid} = $now; + $self->check_device($dev); +} + +# the lengthy device check process +sub check_device { + my ($self, $dev) = @_; + return $self->check_device_done($dev) if $self->{skip_host}{$dev->hostid}; + + my $devid = $dev->id; + my $url = $dev->usage_url; + my $host = $dev->host; + + $self->{seen_hosts}{$host->ip} = 1; + + # now try to get the data with a short timeout + my $start_time = Time::HiRes::time(); + $host->http_get("GET", $dev->usage_url, undef, sub { + my ($response) = @_; + if (!$self->on_usage_response($dev, $response, $start_time)) { + return $self->check_device_done($dev); + } + # next if we're not going to try this now + my $now = time(); + if (($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now) { + return $self->check_device_done($dev); + } + $self->{last_test_write}{$devid} = $now; + + unless ($dev->can_delete_from) { + # we should not try to write on readonly devices because it can be + # mounted as RO. + return $self->dev_observed_readonly($dev); + } + # now we want to check if this device is writeable - unless ($dev->can_delete_from) { - # we should not try to write on readonly devices because it can be # mounted as RO. - $self->state_event('device', $devid, {observed_state => 'readable'}) - if (!$dev->observed_readable); - debug("dev$devid: used = $used, total = $total, writeable = 0"); - return; + # first, create the test-write directory. this will return + # immediately after the first time, as the 'create_directory' + # function caches what it's already created. + $dev->create_directory("/dev$devid/test-write"); # XXX synchronous + + return $self->check_write($dev); + }); +} + +# called on a successful PUT, ensure the data we get back is what we uploaded +sub check_reread { + my ($self, $dev) = @_; + # now let's get it back to verify; note we use the get_port to + # verify that the distinction works (if we have one) + my $test_write = $self->{refresh_state}->{$dev->id}->{test_write}; + $dev->host->http_get("GET", $test_write->{path}, undef, sub { + my ($response) = @_; + $self->on_check_reread_response($dev, $response); + }); +} + +sub on_check_reread_response { + my ($self, $dev, $response) = @_; + my $test_write = $self->{refresh_state}->{$dev->id}->{test_write}; + + # if success and the content matches, mark it writeable + if ($response->is_success) { + if ($response->content eq $test_write->{content}) { + if (!$dev->observed_writeable) { + my $event = { observed_state => 'writeable' }; + $self->state_event('device', $dev->id, $event); + } + $self->dev_debug($dev, 1); + + return $self->check_bogus_md5($dev); # onto the final check... + } + + # content didn't match due to race, retry and hope we're lucky + return $self->check_write($dev) if ($test_write->{tries} > 0); } - # now we want to check if this device is writeable - - # first, create the test-write directory. this will return - # immediately after the first time, as the 'create_directory' - # function caches what it's already created. - $dev->create_directory("/dev$devid/test-write"); - - my $num = int(rand 100); # this was "$$-$now" before, but we don't yet have a cleaner in mogstored for these files - my $puturl = "http://$hostip:$port/dev$devid/test-write/test-write-$num"; - my $content = "time=$now rand=$num"; - my $req = HTTP::Request->new(PUT => $puturl); - $req->content($content); - - # TODO: guard against race-conditions with double-check on failure - - # now, depending on what happens - my $resp = $ua->request($req); - if ($resp->is_success) { - # now let's get it back to verify; note we use the get_port to verify that - # the distinction works (if we have one) - my $geturl = "http://$hostip:$get_port/dev$devid/test-write/test-write-$num"; - my $testwrite = $ua->get($geturl); - - # if success and the content matches, mark it writeable - if ($testwrite->is_success && $testwrite->content eq $content) { - $self->check_bogus_md5($dev); - $self->state_event('device', $devid, {observed_state => 'writeable'}) - if (!$dev->observed_writeable); - debug("dev$devid: used = $used, total = $total, writeable = 1"); - return; + + return $self->dev_observed_readonly($dev); # it's read-only at least +} + +sub on_check_write_response { + my ($self, $dev, $response) = @_; + return $self->check_reread($dev) if $response->is_success; + return $self->dev_observed_readonly($dev); +} + +# returns true on success, false on failure +sub on_usage_response { + my ($self, $dev, $response, $start_time) = @_; + my $host = $dev->host; + my $hostip = $host->ip; + + if ($response->is_success) { + # at this point we can reach the host + if (!$host->observed_reachable) { + my $event = { observed_state => 'reachable' }; + $self->state_event('host', $dev->hostid, $event); } + $self->{iow}->restart_monitoring_if_needed($hostip); + + return $self->check_usage_response($dev, $response); } - # if we fall through to here, then we know that something is not so good, so mark it readable - # which is guaranteed given we even tested writeability - $self->state_event('device', $devid, {observed_state => 'readable'}) - if (!$dev->observed_readable); - debug("dev$devid: used = $used, total = $total, writeable = 0"); + my $url = $dev->usage_url; + my $failed_after = Time::HiRes::time() - $start_time; + if ($failed_after < 0.5) { + if (!$dev->observed_unreachable) { + my $event = { observed_state => 'unreachable' }; + $self->state_event('device', $dev->id, $event); + } + my $get_port = $host->http_get_port; + error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); + } else { + $failed_after = sprintf("%.02f", $failed_after); + if (!$host->observed_unreachable) { + my $event = { observed_state => 'unreachable' }; + $self->state_event('host', $dev->hostid, $event); + } + $self->{skip_host}{$dev->hostid} = 1; + my $timeout = MogileFS->config("node_timeout"); + my $devid = $dev->id; + error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); + } + return 0; # failure } sub check_bogus_md5 { my ($self, $dev) = @_; - my $host = $dev->host; - my $hostip = $host->ip; - my $port = $host->http_port; - my $devid = $dev->id; - my $puturl = "http://$hostip:$port/dev$devid/test-write/test-md5"; - my $req = HTTP::Request->new(PUT => $puturl); - $req->header("Content-MD5", md5_base64("!") . "=="); - $req->content("."); + my $put_path = "/dev${\$dev->id}/test-write/test-md5"; + my $opts = { + headers => { "Content-MD5" => md5_base64("!") . "==", }, + content => '.', + }; # success is bad here, it means the server doesn't understand how to # verify and reject corrupt bodies from Content-MD5 headers. # most servers /will/ succeed here :< - my $resp = $self->ua->request($req); - my $rej = $resp->is_success ? 0 : 1; + $dev->host->http("PUT", $put_path, $opts, sub { + my ($response) = @_; + $self->on_bogus_md5_response($dev, $response); + }); +} + +sub on_bogus_md5_response { + my ($self, $dev, $response) = @_; + my $rej = $response->is_success ? 0 : 1; my $prev = $dev->reject_bad_md5; if (!defined($prev) || $prev != $rej) { - debug("dev$devid: reject_bad_md5 = $rej"); - $self->state_event('device', $devid, { reject_bad_md5 => $rej }); + debug("dev${\$dev->id}: reject_bad_md5 = $rej"); + $self->state_event('device', $dev->id, { reject_bad_md5 => $rej }); + } + return $self->check_device_done($dev); +} + +# if we fall through to here, then we know that something is not so +# good, so mark it readable which is guaranteed given we even tested +# writeability +sub dev_observed_readonly { + my ($self, $dev) = @_; + + if (!$dev->observed_readable) { + my $event = { observed_state => 'readable' }; + $self->state_event('device', $dev->id, $event); + } + $self->dev_debug($dev, 0); + return $self->check_device_done($dev); +} + +# called when all checks are done for a particular device +sub check_device_done { + my ($self, $dev) = @_; + + $self->still_alive; # Ping parent if needed so we don't time out + # given lots of devices. + delete $self->{refresh_state}->{$dev->id}; + + # if refresh_state is totally empty, we're done + if ((scalar keys %{$self->{refresh_state}}) == 0) { + $self->usage_refresh_done; } } From 333e0716fc80027d38d5323353bd9b2de2813886 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Sep 2012 23:21:40 +0000 Subject: [PATCH 363/405] monitor: switch to non-blocking HTTP device checks Net::HTTP::NB is usable with Danga::Socket and may be used to make HTTP requests in parallel. The new connection pool supports persistent connection pooling similar to LWP::ConnCache. Total connection capacity is enforced to prevent out-of-FD situations on the workers. Unlike LWP::ConnCache, MogileFS::ConnectionPool is designed for use with concurrent, active connections. It also supports queueing (when any enforced capacity or system limits are reached) and relies on Danga::Socket for scheduling queued connections. In addition to total capacity limits, MogileFS::ConnectionPool also supports limiting concurrency on a per-destination basis to avoid potentially overloading a single destination. Currently, we limit ourselves to 20 connections from a single worker (matching the old LWP limit) and also limit ourselves to 20 connections to a single host (again matching our previous LWP behavior). --- MANIFEST | 4 + lib/MogileFS/Connection/HTTP.pm | 270 +++++++++++++++ lib/MogileFS/Connection/Poolable.pm | 201 ++++++++++++ lib/MogileFS/ConnectionPool.pm | 493 ++++++++++++++++++++++++++++ lib/MogileFS/Host.pm | 82 +++-- t/http.t | 332 +++++++++++++++++++ 6 files changed, 1346 insertions(+), 36 deletions(-) create mode 100644 lib/MogileFS/Connection/HTTP.pm create mode 100644 lib/MogileFS/Connection/Poolable.pm create mode 100644 lib/MogileFS/ConnectionPool.pm create mode 100644 t/http.t diff --git a/MANIFEST b/MANIFEST index e7ba10bc..bc8700c6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -11,9 +11,12 @@ lib/MogileFS/Checksum.pm lib/MogileFS/Class.pm lib/MogileFS/Config.pm lib/MogileFS/Connection/Client.pm +lib/MogileFS/Connection/HTTP.pm lib/MogileFS/Connection/Mogstored.pm lib/MogileFS/Connection/Parent.pm +lib/MogileFS/Connection/Poolable.pm lib/MogileFS/Connection/Worker.pm +lib/MogileFS/ConnectionPool.pm lib/MogileFS/DevFID.pm lib/MogileFS/Device.pm lib/MogileFS/DeviceState.pm @@ -84,6 +87,7 @@ t/70-reaper.t t/80-job_master.t t/checksum.t t/fid-stat.t +t/http.t t/mogstored-shutdown.t t/multiple-devices-replpol.t t/multiple-hosts-replpol.t diff --git a/lib/MogileFS/Connection/HTTP.pm b/lib/MogileFS/Connection/HTTP.pm new file mode 100644 index 00000000..1c8e87e2 --- /dev/null +++ b/lib/MogileFS/Connection/HTTP.pm @@ -0,0 +1,270 @@ +package MogileFS::Connection::HTTP; +use strict; +use warnings; +use MogileFS::Connection::Poolable; +use HTTP::Response; +use base qw(MogileFS::Connection::Poolable); +use MogileFS::Util qw/debug/; + +use fields ( + 'read_size_hint', # bytes to read for body + 'http_response', # HTTP::Response object + 'http_req', # HTTP request ("GET $URL") + 'http_res_cb', # called on HTTP::Response (_after_ body is read) + 'http_res_body_read',# number of bytes read in the response body + 'http_res_content_cb' # filter for the response body (success-only) +); +use Net::HTTP::NB; + +sub new { + my ($self, $ip, $port) = @_; + my %opts = ( Host => "$ip:$port", Blocking => 0, KeepAlive => 300 ); + my $sock = Net::HTTP::NB->new(%opts) or return; + + $self = fields::new($self) unless ref $self; + $self->SUPER::new($sock, $ip, $port); # MogileFS::Connection::Poolable->new + + return $self; +} + +# starts an HTTP request, returns immediately and relies on Danga::Socket +# to schedule the run the callback. +sub start { + my ($self, $method, $path, $opts, $http_res_cb) = @_; + $opts ||= {}; + + my $err = delete $self->{mfs_err}; + return $self->err_response($err, $http_res_cb) if $err; + + $self->{http_res_cb} = $http_res_cb; + $self->{http_res_content_cb} = $opts->{content_cb}; + $self->{read_size_hint} = $opts->{read_size_hint} || 4096; + + my $h = $opts->{headers} || {}; + $h->{'User-Agent'} = ref($self) . "/$MogileFS::Server::VERSION"; + my $content = $opts->{content}; + if (defined $content) { + # Net::HTTP::NB->format_request will set Content-Length for us + $h->{'Content-Type'} = 'application/octet-stream' + } else { + $content = ""; + } + + # support full URLs for LWP compatibility + # some HTTP daemons don't support Absolute-URIs, so we only give + # them the HTTP/1.0-compatible path + if ($path =~ m{\Ahttps?://[^/]+(/.*)\z}) { + $path = $1; + } + + $self->set_timeout("node_timeout"); + + # Force HTTP/1.0 to avoid potential chunked responses and force server + # to set Content-Length: instead. In practice, we'll never get chunked + # responses anyways as all known DAV servers will set Content-Length + # for static files... + $self->sock->http_version($method eq "GET" ? "1.0" : "1.1"); + $h->{Connection} = "keep-alive"; + + # format the request here since it sets the reader up to read + my $req = $self->sock->format_request($method, $path, %$h, $content); + $self->{http_req} = "$method http://" . $self->key . $path; + + # we'll start watching for writes here since it's unlikely the + # 3-way handshake for new TCP connections is done at this point + $self->write($req); + + # start reading once we're done writing + $self->write(sub { + # we're connected after writing $req is successful, so + # change the timeout and wait for readability + $self->set_timeout("node_timeout"); + $self->watch_read(1); + }); +} + +# called by Danga::Socket upon readability +sub event_read { + my ($self) = @_; + + my $content_cb = $self->{http_res_content_cb}; + my Net::HTTP::NB $sock = $self->sock; + my $res = $self->{http_response}; + + # read and cache HTTP response headers + unless ($res) { + my ($code, $mess, @headers) = eval { $sock->read_response_headers }; + + # wait for readability on EAGAIN + unless (defined $code) { + my $err = $@; + if ($err) { + $err =~ s/ at .*\z//s; # do not expose source file location + $err =~ s/\r?\n/\\n/g; # just in case + return $self->err("read_response_headers: $err"); + } + + # assume EAGAIN, though $! gets clobbered by Net::HTTP::* + return; + } + + # hold onto response object until the response body is processed + $res = HTTP::Response->new($code, $mess, \@headers, ""); + $res->protocol("HTTP/" . $sock->peer_http_version); + $self->{http_response} = $res; + $self->{http_res_body_read} = $content_cb ? 0 : undef; + } + + my $body_read = sub { + $content_cb ? $self->{http_res_body_read} : length($res->content); + }; + + # continue reading the response body if we have a header + my $rsize = $self->{read_size_hint}; + my $buf; + + my $clen = $res->header("Content-Length"); + while (1) { + my $n = $sock->read_entity_body($buf, $rsize); + if (!defined $n) { + if ($!{EAGAIN}) { + # workaround a bug in Net::HTTP::NB + # ref: https://rt.cpan.org/Ticket/Display.html?id=78233 + if (defined($clen) && $clen == $body_read->()) { + return $self->_http_done; + } + + # reset the timeout if we got any body bytes + $self->set_timeout("node_timeout"); + return; + } + next if $!{EINTR}; + return $self->err("read_entity_body: $!"); + } + if ($n == 0) { + # EOF, call the response header callback + return $self->_http_done; + } + if ($n > 0) { + if ($content_cb && $res->is_success) { + $self->{http_res_body_read} += length($buf); + + # filter the buffer through content_cb, no buffering. + # This will be used by tracker-side checksumming + # replicate does NOT use this code path for performance + # reasons (tracker-side checksumming is already a slow path, + # so there's little point in optimizing). + # $buf may be empty on EOF (n == 0) + $content_cb->($buf, $self, $res); + + if (defined($clen) && $clen == $body_read->()) { + return $self->_http_done; + } + } else { + # append to existing buffer, this is only used for + # PUT/DELETE/HEAD and small GET responses (monitor) + $res->content($res->content . $buf); + } + # continue looping until EAGAIN or EOF (n == 0) + } + } +} + +# this does cleanup as an extra paranoid step to prevent circular refs +sub close { + my ($self, $close_reason) = @_; + + delete $self->{http_res_cb}; + delete $self->{http_res_content_cb}; + + $self->SUPER::close($close_reason); # MogileFS::Connection::Poolable->close +} + +# This is only called on a socket-level error (e.g. disconnect, timeout) +# bad server responses (500, 403) do not trigger this +sub err { + my ($self, $reason) = @_; + + # Fake an HTTP response like LWP does on errors. + # delete prevents http_res_cb from being invoked twice, as event_read + # will delete http_res_cb on success, too + my $http_res_cb = delete $self->{http_res_cb}; + + # don't retry if we already got a response header nor if we got a timeout + if ($reason !~ /timeout/ && $self->retryable && $http_res_cb && !$self->{http_response}) { + # do not call inflight_expire here, since we need inflight_cb + # for retrying + + $self->close(":retry"); # trigger a retry in MogileFS::ConnectionPool + } else { + # ensure we don't call new_err on close() + $self->inflight_expire; + + # free the FD before invoking the callback + $self->close($reason); + $self->err_response($reason, $http_res_cb) if $http_res_cb; + } +} + +# Fakes an HTTP response like LWP does on errors. +sub err_response { + my ($self, $err, $http_res_cb) = @_; + + my $res = HTTP::Response->new(500, $err); + $err ||= "(unspecifed error)"; + my $req = $self->{http_req} || "no HTTP request made"; + Mgd::error("$err: $req"); + $res->header("X-MFS-Error", $err); + $res->protocol("HTTP/1.0"); + $http_res_cb->($res); +} + +# returns true if the HTTP connection is persistent/reusable, false if not. +sub _http_persistent { + my ($self, $res) = @_; + + # determine if this connection is reusable: + my $connection = $res->header("Connection"); + my $persist; + + # Connection: header takes precedence over protocol version + if ($connection) { + if ($connection =~ /\bkeep-alive\b/i) { + $persist = 1; + } elsif ($connection =~ /\bclose\b/i) { + $persist = 0; + } + + # if we can't make sense of the Connection: header, fall through + # and decided based on protocol version + } + + # HTTP/1.1 is persistent-by-default, HTTP/1.0 is not. + # Will there be HTTP/1.2? + $persist = $res->protocol eq "HTTP/1.1" unless defined $persist; + + # we're not persistent if the pool is full, either + return ($persist && $self->persist); +} + +# Called on successfully read HTTP response (it could be a server-side +# error (404,403,500...), but not a socket error between client<->server). +sub _http_done { + my ($self) = @_; + + # delete ensures we only fire the callback once + my $http_res_cb = delete $self->{http_res_cb}; + my $res = delete $self->{http_response}; + delete $self->{http_req}; + + # ensure we don't call new_err on eventual close() + $self->inflight_expire; + + # free up the FD if possible + $self->close('http_close') unless $self->_http_persistent($res); + + # finally, invoke the user-supplied callback + $http_res_cb->($res); +} + +1; diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm new file mode 100644 index 00000000..29e7f01d --- /dev/null +++ b/lib/MogileFS/Connection/Poolable.pm @@ -0,0 +1,201 @@ +# private base class for poolable HTTP/Mogstored sidechannel connections +# This is currently only used by HTTP, but is intended for Mogstored +# connections, too. +package MogileFS::Connection::Poolable; +use strict; +use warnings; +use Danga::Socket; +use base qw(Danga::Socket); +use fields ( + 'mfs_pool', # owner of the connection (MogileFS::ConnectionPool) + 'mfs_hostport', # [ ip, port ] + 'mfs_expire', # Danga::Socket::Timer object + 'mfs_expire_cb', # Danga::Socket::Timer callback + 'mfs_requests', # number of requests made on this object + 'mfs_err', # used to propagate an error to start() +); +use Socket qw(SO_KEEPALIVE); +use Time::HiRes; + +# subclasses (MogileFS::Connection::{HTTP,Mogstored}) must call this sub +sub new { + my ($self, $sock, $ip, $port) = @_; + $self->SUPER::new($sock); # Danga::Socket->new + + # connection may not be established, yet + # so Danga::Socket->peer_addr_string can't be used here + $self->{mfs_hostport} = [ $ip, $port ]; + $self->{mfs_requests} = 0; + + return $self; +} + +# used by ConnectionPool for tracking per-hostport connection counts +sub key { join(':', @{$_[0]->{mfs_hostport}}); } + +# backwards compatibility +sub host_port { $_[0]->key; } + +sub ip_port { @{$_[0]->{mfs_hostport}}; } + +sub fd { fileno($_[0]->sock); } + +# marks a connection as idle, call this before putting it in a connection +# pool for eventual reuse. +sub mark_idle { + my ($self) = @_; + + $self->watch_read(0); + + # set the keepalive flag the first time we're idle + $self->sock->sockopt(SO_KEEPALIVE, 1) if $self->{mfs_requests} == 0; + + $self->{mfs_requests}++; +} + +# the request running on this connection is retryable if this socket +# has ever been marked idle. The connection pool can never be 100% +# reliable for detecting dead sockets, and all HTTP requests made by +# MogileFS are idempotent. +sub retryable { $_[0]->{mfs_requests} > 0 } + +# Sets (or updates) the timeout of the connection +# timeout_key is "node_timeout" or "conn_timeout" +# clears the current timeout if timeout_key is undef +sub set_timeout { + my ($self, $timeout_key) = @_; + my $mfs_pool = $self->{mfs_pool}; + + if ($timeout_key) { + my $timeout; + + if ($timeout_key =~ /[a-z_]/) { + $timeout = MogileFS->config($timeout_key) || 2; + } else { + $timeout = $timeout_key; + $timeout_key = "timeout"; + } + + my $t0 = Time::HiRes::time(); + $self->{mfs_expire} = $t0 + $timeout; + $self->{mfs_expire_cb} = sub { + my ($now) = @_; + my $elapsed = $now - $t0; + + # for HTTP, this will fake an HTTP error response like LWP does + $self->err("$timeout_key: $timeout (elapsed: $elapsed)"); + }; + $mfs_pool->register_timeout($self, $timeout) if $mfs_pool; + } else { + $self->{mfs_expire} = $self->{mfs_expire_cb} = undef; + $mfs_pool->register_timeout($self, undef) if $mfs_pool; + } +} + +# returns the expiry time of the connection +sub expiry { $_[0]->{mfs_expire} } + +# runs expiry callback and returns true if time is up, +# returns false if there is time remaining +sub expired { + my ($self, $now) = @_; + my $expire = $self->{mfs_expire} or return 0; + $now ||= Time::HiRes::time(); + + if ($now >= $expire) { + my $expire_cb = delete $self->{mfs_expire_cb}; + if ($expire_cb && $self->sock) { + $expire_cb->($now); + } + return 1; + } + return 0; +} + +# may be overriden in subclass, called only on errors +# The HTTP version of this will fake an HTTP response for LWP compatibility +sub err { + my ($self, $close_reason) = @_; + + $self->inflight_expire; # ensure we don't call new_err on eventual close() + + if ($close_reason =~ /\A:event_(?:hup|err)\z/) { + # there's a chance this can be invoked while inflight, + # conn_drop will handle this case appropriately + $self->{mfs_pool}->conn_drop($self, $close_reason) if $self->{mfs_pool}; + } else { + $self->close($close_reason); + } +} + +# sets the pool this connection belongs to, only call from ConnectionPool +sub set_pool { + my ($self, $pool) = @_; + + $self->{mfs_pool} = $pool; +} + +# closes a connection, and may reschedule the inflight callback if +# close_reason is ":retry" +sub close { + my ($self, $close_reason) = @_; + + delete $self->{mfs_expire_cb}; # avoid circular ref + + my $mfs_pool = delete $self->{mfs_pool}; # avoid circular ref + my $inflight_cb; + + if ($mfs_pool) { + $mfs_pool->schedule_queued; + $inflight_cb = $mfs_pool->conn_close_prepare($self, $close_reason); + } + $self->SUPER::close($close_reason); # Danga::Socket->close + + if ($inflight_cb && $close_reason) { + if ($close_reason eq ":retry") { + my ($ip, $port) = $self->ip_port; + + $mfs_pool->enqueue($ip, $port, $inflight_cb); + } else { + # Danga::Socket-scheduled write()s which fail with ECONNREFUSED, + # EPIPE, or "write_error" after an initial (non-blocking) + # connect() + $mfs_pool->on_next_tick(sub { + ref($self)->new_err($close_reason || "error", $inflight_cb); + }); + } + } +} + +# Marks a connection as no-longer inflight. Calling this prevents retries. +sub inflight_expire { + my ($self) = @_; + my $mfs_pool = $self->{mfs_pool}; + die "BUG: expiring without MogileFS::ConnectionPool\n" unless $mfs_pool; + $mfs_pool->inflight_cb_expire($self); +} + +# Danga::Socket callbacks +sub event_hup { $_[0]->err(':event_hup'); } +sub event_err { $_[0]->err(':event_err'); } + +# called when we couldn't create a socket, but need to create an object +# anyways for errors (creating fake, LWP-style error responses) +sub new_err { + my ($class, $err, $start_cb) = @_; + my $self = fields::new($class); + $self->{mfs_err} = $err; + # on socket errors + $start_cb->($self); +} + +# returns this connection back to its associated pool. +# Returns false if not successful (pool is full) +sub persist { + my ($self) = @_; + my $mfs_pool = $self->{mfs_pool}; + + return $mfs_pool ? $mfs_pool->conn_persist($self) : 0; +} + +1; diff --git a/lib/MogileFS/ConnectionPool.pm b/lib/MogileFS/ConnectionPool.pm new file mode 100644 index 00000000..2b9a59d9 --- /dev/null +++ b/lib/MogileFS/ConnectionPool.pm @@ -0,0 +1,493 @@ +# a connection pool class with queueing. +# (something doesn't sound quite right with that...) +# This requires Danga::Socket to drive, but may also function without it +# via conn_get/conn_put. +package MogileFS::ConnectionPool; +use strict; +use warnings; +use Carp qw(croak confess); +use Time::HiRes; + +use constant NEVER => (0xffffffff << 32) | 0xffffffff; # portable version :P + +sub new { + my ($class, $conn_class, $opts) = @_; + + $opts ||= {}; + my $self = bless { + fdmap => {}, # { fd -> conn } + idle => {}, # ip:port -> [ MogileFS::Connection::Poolable, ... ] + queue => [], # [ [ ip, port, callback ], ... ] + timer => undef, # Danga::Socket::Timer object + timeouts => {}, # { fd -> conn } + inflight => {}, # ip:port -> { fd -> callback } + total_inflight => 0, # number of inflight connections + dest_capacity => $opts->{dest_capacity}, + total_capacity => $opts->{total_capacity}, + class => $conn_class, + scheduled => 0, # set if we'll start tasks on next tick + on_next_tick => [], + next_expiry => NEVER, + }, $class; + + # total_capacity=20 matches what we used with LWP + $self->{total_capacity} ||= 20; + + # allow users to specify per-destination capacity limits + $self->{dest_capacity} ||= $self->{total_capacity}; + + return $self; +} + +# retrieves an idle connection for the [IP, port] pair +sub _conn_idle_get { + my ($self, $ip, $port) = @_; + + my $key = "$ip:$port"; + my $idle = $self->{idle}->{$key} or return; + + # the Danga::Socket event loop may detect hangups and close sockets, + # However not all MFS workers run this event loop, so we need to + # validate the connection when retrieving a connection from the pool + while (my $conn = pop @$idle) { + # make sure the socket is valid: + + # due to response callback ordering, we actually place connections + # in the pool before invoking the user-supplied response callback + # (to allow connections to get reused ASAP) + my $sock = $conn->sock or next; + + # hope this returns EAGAIN, not using OO->sysread here since + # Net::HTTP::NB overrides that and we _want_ to hit EAGAIN here + my $r = sysread($sock, my $byte, 1); + + # good, connection is possibly still alive if we got EAGAIN + return $conn if (!defined $r && $!{EAGAIN}); + + my $err = $!; + if (defined $r) { + if ($r == 0) { + # a common case and to be expected + $err = "server dropped idle connection"; + } else { + # this is a bug either on our side or the HTTP server + Mgd::error("Bug: unexpected got $r bytes from idle conn to ". $conn->host_port. ") (byte=$byte)"); + } + } + + # connection is bad, close the socket and move onto the + # next idle connection if there is one. + $conn->close($err); + } + + return; +} + +# creates a new connection if under capacity +# returns undef if we're at capacity (or on EMFILE/ENFILE) +sub _conn_new_maybe { + my ($self, $ip, $port) = @_; + my $key = "$ip:$port"; + + # we only call this sub if we don't have idle connections, so + # we don't check {idle} here + + # make sure we're not already at capacity for this destination + my $nr_inflight = scalar keys %{$self->{inflight}->{$key} ||= {}}; + return if ($nr_inflight >= $self->{dest_capacity}); + + # see how we're doing with regard to total capacity: + if ($self->_total_connections >= $self->{total_capacity}) { + # see if we have idle connections for other pools to kill + if ($self->{total_inflight} < $self->{total_capacity}) { + # we have idle connections to other destinations, drop one of those + $self->_conn_drop_idle; + # fall through to creating a new connection + } else { + # we're at total capacity for the entire pool + return; + } + } + + # we're hopefully under capacity if we got here, create a new connection + $self->_conn_new($ip, $port); +} + +# creates new connection and registers it in our fdmap +# returns undef if resources (FDs, buffers) aren't available +sub _conn_new { + my ($self, $ip, $port) = @_; + + # calls MogileFS::Connection::{HTTP,Mogstored}->new: + my $conn = $self->{class}->new($ip, $port); + if ($conn) { + # register the connection + $self->{fdmap}->{$conn->fd} = $conn; + $conn->set_pool($self); + + return $conn; + } else { + # EMFILE/ENFILE should never happen as the capacity for this + # pool is far under the system defaults. Just give up on + # EMFILE/ENFILE like any other error. + my $mfs_err = $!; + Mgd::log('err', "failed to create socket to $ip:$port ($mfs_err)"); + + return $mfs_err; + } +} + +# retrieves a connection, may return undef if at capacity +sub _conn_get { + my ($self, $ip, $port) = @_; + + # if we have idle connections, always use them first + $self->_conn_idle_get($ip, $port) || $self->_conn_new_maybe($ip, $port); +} + +# Pulls a connection out of the pool for synchronous use. +# This may create a new connection (independent of pool limits). +# The connection returned by this is _blocking_. This is currently +# only used by replicate. +sub conn_get { + my ($self, $ip, $port) = @_; + my $conn = $self->_conn_idle_get($ip, $port); + + if ($conn) { + # in case the connection never comes back, let refcounting close() it: + delete $self->{fdmap}->{$conn->fd}; + } else { + $conn = $self->_conn_new($ip, $port); + unless (ref $conn) { + $! = $conn; # $conn is an error message :< + return; + } + delete $self->{fdmap}->{$conn->fd}; + my $timeout = MogileFS->config("node_timeout"); + MogileFS::Util::wait_for_writeability($conn->fd, $timeout) or return; + } + + return $conn; +} + +# retrieves a connection from the connection pool and executes +# inflight_cb on it. If the pool is at capacity, this will queue the task. +# This relies on Danga::Socket->EventLoop +sub start { + my ($self, $ip, $port, $inflight_cb) = @_; + + my $conn = $self->_conn_get($ip, $port); + if ($conn) { + $self->_conn_run($conn, $inflight_cb); + } else { # we're too busy right now, queue up + $self->enqueue($ip, $port, $inflight_cb); + } +} + +# returns the total number of connections we have +sub _total_connections { + my ($self) = @_; + return scalar keys %{$self->{fdmap}}; +} + +# marks a connection as no longer inflight, returns the inflight +# callback if the connection was active, undef if not +sub inflight_cb_expire { + my ($self, $conn) = @_; + my $inflight_cb = delete $self->{inflight}->{$conn->key}->{$conn->fd}; + $self->{total_inflight}-- if $inflight_cb; + + return $inflight_cb; +} + +# schedules the event loop to dequeue and run a task on the next +# tick of the Danga::Socket event loop. Call this +# 1) whenever a task is enqueued +# 2) whenever a task is complete +sub schedule_queued { + my ($self) = @_; + + # AddTimer(0) to avoid potential stack overflow + $self->{scheduled} ||= Danga::Socket->AddTimer(0, sub { + $self->{scheduled} = undef; + my $queue = $self->{queue}; + + my $total_capacity = $self->{total_capacity}; + my $i = 0; + + while ($self->{total_inflight} < $total_capacity + && $i <= (scalar(@$queue) - 1)) { + my ($ip, $port, $cb) = @{$queue->[$i]}; + + my $conn = $self->_conn_get($ip, $port); + if ($conn) { + splice(@$queue, $i, 1); # remove from queue + $self->_conn_run($conn, $cb); + } else { + # this queue object cannot be dequeued, skip it for now + $i++; + } + } + }); +} + +# Call this when done using an (inflight) connection +# This possibly places a connection in the connection pool. +# This will close the connection of the pool is already at capacity. +# This will also start the next queued callback, or retry if needed +sub conn_persist { + my ($self, $conn) = @_; + + # schedule the next request if we're done with any connection + $self->schedule_queued; + $self->conn_put($conn); +} + +# The opposite of conn_get, this returns a connection retrieved with conn_get +# back to the connection pool, making it available for future use. Dead +# connections are not stored. +# This is currently only used by replicate. +sub conn_put { + my ($self, $conn) = @_; + + my $key = $conn->key; + # we do not store dead connections + my $peer_addr = $conn->peer_addr_string; + + if ($peer_addr) { + # connection is still alive, respect capacity limits + my $idle = $self->{idle}->{$key} ||= []; + + # register it in the fdmap just in case: + $self->{fdmap}->{$conn->fd} = $conn; + + if ($self->_dest_total($conn) < $self->{dest_capacity}) { + $conn->mark_idle; + push @$idle, $conn; # yay, connection is reusable + $conn->set_timeout(undef); # clear timeout + return 1; # success + } + } + + # we have too many connections or the socket is dead, caller + # should close after returning from this function. + return 0; +} + +# enqueues a request (inflight_cb) and schedules it to run ASAP +# This must be used with Danga::Socket->EventLoop +sub enqueue { + my ($self, $ip, $port, $inflight_cb) = @_; + + push @{$self->{queue}}, [ $ip, $port, $inflight_cb ]; + + # we have something in the queue, make sure it's run soon + $self->schedule_queued; +} + +# returns the total connections to the host of a given connection +sub _dest_total { + my ($self, $conn) = @_; + my $key = $conn->key; + my $inflight = scalar keys %{$self->{inflight}->{$key}}; + my $idle = scalar @{$self->{idle}->{$key}}; + return $idle + $inflight; +} + +# only call this from the event_hup/event_err callbacks used by Danga::Socket +sub conn_drop { + my ($self, $conn, $close_reason) = @_; + + my $fd = $conn->fd; + my $key = $conn->key; + + # event_read must handle errors anyways, so hand off + # error handling to the event_read callback if inflight. + return $conn->event_read if $self->{inflight}->{$key}->{$fd}; + + # we get here if and only if the socket is idle, we can drop it ourselves + # splice out the socket we're closing from the idle pool + my $idle = $self->{idle}->{$key}; + foreach my $i (0 .. (scalar(@$idle) - 1)) { + my $old = $idle->[$i]; + if ($old->sock) { + if ($old->fd == $fd) { + splice(@$idle, $i, 1); + $conn->close($close_reason); + return; + } + } else { + # some connections may have expired but not been spliced out, yet + # splice it out here since we're iterating anyways + splice(@$idle, $i, 1); + } + } +} + +# unregisters and prepares connection to be closed +# Returns the inflight callback if there was one +sub conn_close_prepare { + my ($self, $conn, $close_reason) = @_; + + if ($conn->sock) { + my $fd = $conn->fd; + + my $valid = delete $self->{fdmap}->{$fd}; + delete $self->{timeouts}->{$fd}; + + my $inflight_cb = $self->inflight_cb_expire($conn); + + # $valid may be undef in replicate worker which removes connections + # from fdmap. However, valid==undef connections should never have + # an inflight_cb + if ($inflight_cb && !$valid) { + croak("BUG: dropping unregistered conn with callback: $conn"); + } + return $inflight_cb; + } +} + +# schedules cb to run on the next tick of the event loop, +# (immediately after this tick runs) +sub on_next_tick { + my ($self, $cb) = @_; + my $on_next_tick = $self->{on_next_tick}; + push @$on_next_tick, $cb; + + if (scalar(@$on_next_tick) == 1) { + Danga::Socket->AddTimer(0, sub { + # prevent scheduled callbacks from being called on _this_ tick + $on_next_tick = $self->{on_next_tick}; + $self->{on_next_tick} = []; + + while (my $sub = shift @$on_next_tick) { + $sub->() + } + }); + } +} + +# marks a connection inflight and invokes cb on it +# $conn may be a error string, in which case we'll invoke the user-supplied +# callback with a mock error (this mimics how LWP fakes an HTTP response +# even if the socket could not be created/connected) +sub _conn_run { + my ($self, $conn, $cb) = @_; + + if (ref $conn) { + my $inflight = $self->{inflight}->{$conn->key} ||= {}; + $inflight->{$conn->fd} = $cb; # stash callback for retrying + $self->{total_inflight}++; + $cb->($conn); + } else { + # fake an error message on the response callback + $self->on_next_tick(sub { + # fatal error creating the socket, do not queue + my $mfs_err = $conn; + $self->{class}->new_err($mfs_err, $cb); + + # onto the next request + $self->schedule_queued; + }); + } +} + +# drops an idle connection from the idle connection pool (so we can open +# another socket without incurring out-of-FD errors) +# Only call when you're certain there's a connection to drop +# XXX This is O(destinations), unfortunately +sub _conn_drop_idle { + my ($self) = @_; + my $idle = $self->{idle}; + + # using "each" on the hash since it preserves the internal iterator + # of the hash across invocations of this sub. This should preserve + # the balance of idle connections in a big pool with many hosts. + # Thus we loop twice to ensure we scan the entire idle connection + # pool if needed + foreach (1..2) { + while (my (undef, $val) = each %$idle) { + my $conn = shift @$val or next; + + $conn->close("idle_expire") if $conn->sock; + return; + } + } + + confess("BUG: unable to drop an idle connection"); +} + +# checks for expired connections, this can be expensive if there +# are many concurrent connections waiting on timeouts, but still +# better than having AddTimer create a Danga::Socket::Timer object +# every time a timeout is reset. +sub check_timeouts { + my ($self) = @_; + my $timeouts = $self->{timeouts}; + my @fds = keys %$timeouts; + my $next_expiry = NEVER; + my $now = Time::HiRes::time(); + + # this is O(n) where n is concurrent connections + foreach my $fd (@fds) { + my $conn = $timeouts->{$fd}; + if ($conn->expired($now)) { + delete $timeouts->{$fd}; + } else { + # look for the next timeout + my $expiry = $conn->expiry; + if ($expiry) { + $next_expiry = $expiry if $expiry < $next_expiry; + } else { + # just in case, this may not happen... + delete $timeouts->{$fd}; + } + } + } + + # schedule the wakeup for the next timeout + if ($next_expiry == NEVER) { + $self->{timer} = undef; + } else { + my $timeout = $next_expiry - $now; + $timeout = 0 if $timeout <= 0; + $self->{timer} = Danga::Socket->AddTimer($timeout, sub { + $self->check_timeouts; + }); + } + $self->{next_expiry} = $next_expiry; +} + +# registers a timeout for a given connection, each connection may only +# have one pending timeout. Timeout may be undef to cancel the current +# timeout. +sub register_timeout { + my ($self, $conn, $timeout) = @_; + + if ($conn->sock) { + my $fd = $conn->fd; + if ($timeout) { + $self->{timeouts}->{$fd} = $conn; + my $next_expiry = $self->{next_expiry}; + my $old_timer = $self->{timer}; + my $expiry = $timeout + Time::HiRes::time(); + + if (!$old_timer || $expiry < $next_expiry) { + $self->{next_expiry} = $expiry; + $self->{timer} = Danga::Socket->AddTimer($timeout, sub { + $self->check_timeouts; + }); + $old_timer->cancel if $old_timer; + } + } else { + delete $self->{timeouts}->{$fd}; + } + } elsif ($timeout) { # this may never happen... + # no FD, so we must allocate a new Danga::Socket::Timer object + # add 1msec to avoid FP rounding problems leading to missed + # expiration when calling conn->expired + Danga::Socket->AddTimer($timeout + 0.001, sub { $conn->expired }); + } +} + +1; diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 6b0c5601..11e75453 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -5,15 +5,9 @@ use MogileFS::Util qw(throw); use Net::Netmask; use Carp qw(croak); use MogileFS::Connection::Mogstored; - -# temporary... -use LWP::UserAgent; -sub ua { - LWP::UserAgent->new( - timeout => MogileFS::Config->config('conn_timeout') || 2, - keep_alive => 20, - ); -} +use MogileFS::Connection::HTTP; +use MogileFS::ConnectionPool; +our $http_pool; =head1 @@ -109,43 +103,59 @@ sub sidechannel_port { MogileFS->config("mogstored_stream_port"); } -sub _http_req { - my ($self, $method, $uri, $opts, $cb) = @_; +# starts an HTTP request on the given $port with $method to $path +# Calls cb with an HTTP::Response object when done +sub _http_conn { + my ($self, $port, $method, $path, $opts, $cb) = @_; + _init_pools(); - $opts ||= {}; - my $h = $opts->{headers} || {}; - my $req = HTTP::Request->new($method, $uri, [ %$h ]); - $req->content($opts->{content}) if exists $opts->{content}; - Danga::Socket->AddTimer(0, sub { - my $response = $self->ua->request($req); - Danga::Socket->AddTimer(0, sub { $cb->($response) }); + $http_pool->start($opts->{ip} || $self->ip, $port, sub { + $_[0]->start($method, $path, $opts, $cb); }); } -# FIXME: make async -sub http_get { - my ($self, $method, $uri, $opts, $cb) = @_; +# Returns a ready, blocking HTTP connection +# This is only used by replicate +sub http_conn_get { + my ($self, $opts) = @_; + my $ip = $opts->{ip} || $self->ip; + my $port = $opts->{port} || $self->http_port; + + _init_pools(); + my $conn = $http_pool->conn_get($ip, $port); + $conn->sock->blocking(1); + return $conn; +} - if ($method !~ /\A(?:GET|HEAD)\z/) { - die "Bad method for HTTP get port: $method"; - } +# Returns a blocking HTTP connection back to the pool. +# This is the inverse of http_conn_get, and should be called when +# done using a connection (iff the connection is really still alive) +# (and makes it non-blocking for future use) +# This is only used by replicate. +sub http_conn_put { + my ($self, $conn) = @_; + $conn->sock->blocking(0); + $http_pool->conn_put($conn); +} - # convert path-only URL to full URL - if ($uri =~ m{\A/}) { - $uri = 'http://' . $self->ip . ':' . $self->http_get_port . $uri; - } - $self->_http_req($method, $uri, $opts, $cb); +sub http_get { + my ($self, $method, $path, $opts, $cb) = @_; + $opts ||= {}; + $self->_http_conn($self->http_get_port, $method, $path, $opts, $cb); } -# FIXME: make async sub http { - my ($self, $method, $uri, $opts, $cb) = @_; + my ($self, $method, $path, $opts, $cb) = @_; + $opts ||= {}; + my $port = delete $opts->{port} || $self->http_port; + $self->_http_conn($port, $method, $path, $opts, $cb); +} - # convert path-only URL to full URL - if ($uri =~ m{\A/}) { - $uri = 'http://' . $self->ip . ':' . $self->http_port . $uri; - } - $self->_http_req($method, $uri, $opts, $cb); +# FIXME - make these customizable +sub _init_pools { + return if $http_pool; + + $http_pool = MogileFS::ConnectionPool->new("MogileFS::Connection::HTTP"); } 1; diff --git a/t/http.t b/t/http.t new file mode 100644 index 00000000..04ee993a --- /dev/null +++ b/t/http.t @@ -0,0 +1,332 @@ +# this test reaches inside MogileFS::Host and MogileFS::Connection::HTTP +# internals to ensure error handling and odd corner cases are handled +# (existing tests may not exercise this in monitor) +use strict; +use warnings; +use Test::More; +use MogileFS::Server; +use MogileFS::Test; +use MogileFS::Util qw/wait_for_readability/; +use Danga::Socket; +use IO::Socket::INET; +use Socket qw(TCP_NODELAY); + +# bind a random TCP port for testing +my %lopts = ( + LocalAddr => "127.0.0.1", + LocalPort => 0, + Proto => "tcp", + ReuseAddr => 1, + Listen => 1024 +); +my $http = IO::Socket::INET->new(%lopts); +$http->sockopt(TCP_NODELAY, 1); +my $http_get = IO::Socket::INET->new(%lopts); +$http_get->sockopt(TCP_NODELAY, 1); + +my $host_args = { + hostid => 1, + hostname => 'mockhost', + hostip => $http->sockhost, + http_port => $http->sockport, + http_get_port => $http_get->sockport, +}; +my $host = MogileFS::Host->new_from_args($host_args); +MogileFS::Host->_init_pools; +my $idle_pool = $MogileFS::Host::http_pool->{idle}; +is("MogileFS::Host", ref($host), "host created"); +MogileFS::Config->set_config("node_timeout", 1); + +# hit the http_get_port +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http_get("GET", "/read-only", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http_get->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /read-only HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is success"); + is(200, $resp->code, "got HTTP 200 response"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_get_port}"}; + is(1, scalar @$pool, "connection placed in GET pool"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a trickled response from server +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/trickle", undef, sub { $resp = $_[0] }); + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + my $r = "HTTP/1.1 200 OK\r\nContent-Length: 100\r\n\r\n"; + if ($buf =~ /trickle/) { + foreach my $x (split(//, $r)) { + $s->syswrite($x); + sleep 0.01; + } + foreach my $i (1..100) { + $s->syswrite($i % 10); + sleep 0.1; + } + } + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is successful"); + my $expect = ""; + foreach my $i (1..100) { + $expect .= $i % 10; + } + is($expect, $resp->content, "response matches expected"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a differently trickled response from server +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + my $body = "*" x 100; + $host->http("GET", "/trickle-head-body", undef, sub { $resp = $_[0] }); + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + my $r = "HTTP/1.1 200 OK\r\nContent-Length: 100\r\n\r\n"; + if ($buf =~ /trickle-head-body/) { + $s->syswrite($r); + sleep 1; + $s->syswrite($body); + } + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is successful on trickle"); + is($resp->content, $body, "trickled response matches expected"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a server that disconnected after a (very short) idle time +# despite supporting persistent conns +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/foo", undef, sub { $resp = $_[0] }); + my $conn; + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /foo HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is success"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "connection placed in pool"); + $conn = $pool->[0]; + }); + + # try again, server didn't actually keep the connection alive, + $resp = undef; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/again", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /again HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "new connection placed in pool"); + isnt($conn, $pool->[0], "reference not reused"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate persistent connection reuse +{ + my $resp; + my $nr = 6; + my $conn; + + my $failsafe = Danga::Socket->AddTimer(5, sub { $resp = "FAIL TIMEOUT" }); + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + + server_do(sub { + my $s = $http->accept; + my $buf; + foreach my $i (1..$nr) { + $buf = read_one_request($s); + if ($buf =~ m{\AGET /$i HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n$i"); + } + } + sleep 6; # wait for SIGKILL + }, + sub { + foreach my $i (1..$nr) { + $resp = undef; + $host->http("GET", "/$i", undef, sub { $resp = $_[0] }); + Danga::Socket->EventLoop; + is(ref($resp), "HTTP::Response", "got HTTP response"); + ok($resp->is_success, "HTTP response is successful"); + is($i, $resp->content, "response matched"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "connection placed in connection pool"); + + if ($i == 1) { + $conn = $pool->[0]; + is("MogileFS::Connection::HTTP", ref($conn), "got connection"); + } else { + ok($conn == $pool->[0], "existing connection reused (#$i)"); + } + } + }); + $failsafe->cancel; + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a node_timeout +sub sim_node_timeout { + my ($send_header) = @_; + my $resp; + + # we need this timer (just to exist) to break out of the event loop + my $t = Danga::Socket->AddTimer(1.2, sub { fail("timer should not fire") }); + + my $req = "/node-time-me-out-"; + $req .= $send_header ? 1 : 0; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", $req, undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ /node-time-me-out/) { + if ($send_header) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n"); + } + sleep 60; # wait to trigger timeout + } else { + # nuke the connection to _NOT_ trigger timeout + $s->syswrite("HTTP/1.1 404 Not Found\r\n\r\n"); + close($s); + } + }, + sub { + Danga::Socket->EventLoop; + $t->cancel; + ok(! $resp->is_success, "HTTP response is not successful"); + like($resp->message, qr/node_timeout/, "node_timeout hit"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +sim_node_timeout(0); +sim_node_timeout(1); + +# server just drops connection +{ + my $resp; + + # we want an empty pool to avoid retries + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/drop-me", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + close $s if ($buf =~ /drop-me/); + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok(! $resp->is_success, "HTTP response is not successful"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +done_testing(); + +sub has_nothing_inflight { + my $inflight = $MogileFS::Host::http_pool->{inflight}; + my $n = 0; + foreach my $host_port (keys %$inflight) { + $n += scalar keys %{$inflight->{$host_port}}; + } + is($MogileFS::Host::http_pool->{total_inflight}, 0, "nothing is counted to be inflight"); + is($n, 0, "nothing is really inflight"); +} + +sub has_nothing_queued { + is(scalar @{$MogileFS::Host::http_pool->{queue}}, 0, "connection pool task queue is empty"); +} + +sub server_do { + my ($child, $parent) = @_; + my $pid = fork; + fail("fork failed: $!") unless defined($pid); + + if ($pid == 0) { + $child->(); + } else { + $parent->(); + is(1, kill(9, $pid), "child killed"); + is($pid, waitpid($pid, 0), "child reaped"); + } +} + +sub read_one_request { + my $s = shift; + + my $fd = fileno($s); + wait_for_readability($fd, 5); + my $buf = ""; + do { + $s->sysread($buf, 4096, length($buf)); + } while wait_for_readability($fd, 0.1) && $buf !~ /\r\n\r\n/; + return $buf; +} From f2815569872abcdd78b7c14491888ea79986cd6d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 10 Sep 2012 19:52:50 -0700 Subject: [PATCH 364/405] JobMaster: use Danga::Socket to schedule In the future, this will allow JobMaster to write concurrently to ProcManager (or even individual workers) without blocking. (tweaked to accomodate "!want 0 job_master" support) --- lib/MogileFS/Worker/JobMaster.pm | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 26bc347c..d70eb395 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -46,19 +46,30 @@ sub work { $self->{dele_queue_limit} = 100; $self->{rebl_queue_limit} = 100; - every(1, sub { - # 'pings' parent and populates all queues. - return unless $self->validate_dbh; + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + + # kick off the initial run + $self->check_queues; + Danga::Socket->EventLoop; +} + +# 'pings' parent and populates all queues. +sub check_queues { + my $self = shift; + + my $active = 0; + if ($self->validate_dbh) { $self->send_to_parent("queue_depth all"); my $sto = Mgd::get_store(); - $self->read_from_parent(1); - my $active = 0; + $self->parent_ping; $active += $self->_check_replicate_queues($sto); $active += $self->_check_delete_queues($sto); $active += $self->_check_fsck_queues($sto); $active += $self->_check_rebal_queues($sto); - $_[0]->(0) if $active; - }); + } + + # don't sleep if active (just avoid recursion) + Danga::Socket->AddTimer($active ? 0 : 1, sub { $self->check_queues }); } sub _check_delete_queues { From a78bc668ab900135d253a058d4573c13386f37c3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Sep 2012 12:01:43 -0700 Subject: [PATCH 365/405] httpfile: remove size check failure backoff handling This backoff handling in HTTPFile is redundant for several reasons: * We rely on the monitor worker anyways to inform us of unreachable hosts * Monitor runs much faster nowadays, giving us a smaller window for out-of-date information about host reachability * HTTPFile->size no longer connects to the sidechannel port, only HTTP, so we waste fewer syscalls on failure if we a host went down before the last monitor run. --- lib/MogileFS/HTTPFile.pm | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 1960a015..5a10d706 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -10,9 +10,6 @@ use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability) # (caching the connection used for HEAD requests) my $user_agent; -my %size_check_retry_after; # host => $hirestime. -my %size_check_failcount; # host => $count. - my %sidechannel_nexterr; # host => next error log time # create a new MogileFS::HTTPFile instance from a URL. not called @@ -105,15 +102,11 @@ sub size { my ($host, $port, $uri, $path) = map { $self->{$_} } qw(host port uri url); - return undef if (exists $size_check_retry_after{$host} - && $size_check_retry_after{$host} > Time::HiRes::time()); - my $node_timeout = MogileFS->config("node_timeout"); # Hardcoded connection cache size of 20 :( $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); my $res = $user_agent->head($path); if ($res->is_success) { - delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; my $size = $res->header('content-length'); if (! defined $size && $res->header('server') =~ m/^lighttpd/) { @@ -126,16 +119,8 @@ sub size { return $size; } else { if ($res->code == 404) { - delete $size_check_failcount{$host} if exists $size_check_failcount{$host}; return FILE_MISSING; } - if ($res->message =~ m/connect:/) { - my $count = $size_check_failcount{$host}; - $count ||= 1; - $count *= 2 unless $count > 360; - $size_check_retry_after{$host} = Time::HiRes::time() + $count; - $size_check_failcount{$host} = $count; - } return undeferr("Failed HEAD check for $path (" . $res->code . "): " . $res->message); } From 81433c27e0c2aed4f65c2b38f67c46f77607e2c0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Sep 2012 14:13:29 -0700 Subject: [PATCH 366/405] fsck: parallelize size checks for any given FID This allows us to us to speed up fsck on high latency clusters by issuing parallel HEAD requests. --- lib/MogileFS/DevFID.pm | 18 +++++++++++------ lib/MogileFS/HTTPFile.pm | 40 +++++++++++++++++++++++++++++-------- lib/MogileFS/Worker/Fsck.pm | 19 +++++++++++++----- 3 files changed, 58 insertions(+), 19 deletions(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index b004bb07..1b11719f 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -60,16 +60,22 @@ sub vivify_directories { # returns 0 on missing, # undef on connectivity error, # else size of file on disk (after HTTP HEAD or mogstored stat) +# invokes $cb on the size if $cb is supplied (and Danga::Socket->EventLoop runs) sub size_on_disk { - my $self = shift; + my ($self, $cb) = @_; - return undef unless $self->device->should_read_from; + if ($self->device->should_read_from) { + my $url = $self->get_url; + my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); - my $url = $self->get_url; - my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); + # check that it has size (>0) and is reachable (not undef) + return $httpfile->size($cb); + } else { + # monitor says we cannot read from this device, so do not try - # check that it has size (>0) and is reachable (not undef) - return $httpfile->size; + Danga::Socket->AddTimer(0, sub { $cb->(undef) }) if $cb; + return undef; + } } # returns -1 on missing, diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 5a10d706..56b983d0 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -94,18 +94,42 @@ sub delete { # returns size of file, (doing a HEAD request and looking at content-length) # returns -1 on file missing (404), # returns undef on connectivity error +# +# If an optional callback is supplied, the return value is given to the +# callback. +# +# workers running Danga::Socket->EventLoop must supply a callback +# workers NOT running Danga::Socket->EventLoop msut not supply a callback use constant FILE_MISSING => -1; sub size { - my $self = shift; + my ($self, $cb) = @_; + my %opts = ( port => $self->{port} ); + + if ($cb) { # run asynchronously + if (defined $self->{_size}) { + Danga::Socket->AddTimer(0, sub { $cb->($self->{_size}) }); + } else { + $self->host->http("HEAD", $self->{uri}, \%opts, sub { + $cb->($self->on_size_response(@_)); + }); + } + return undef; + } else { # run synchronously + return $self->{_size} if defined $self->{_size}; - return $self->{_size} if defined $self->{_size}; + my $res; + $self->host->http("HEAD", $self->{uri}, \%opts, sub { ($res) = @_ }); - my ($host, $port, $uri, $path) = map { $self->{$_} } qw(host port uri url); + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + + return $self->on_size_response($res); + } +} + +sub on_size_response { + my ($self, $res) = @_; - my $node_timeout = MogileFS->config("node_timeout"); - # Hardcoded connection cache size of 20 :( - $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); - my $res = $user_agent->head($path); if ($res->is_success) { my $size = $res->header('content-length'); if (! defined $size && @@ -121,7 +145,7 @@ sub size { if ($res->code == 404) { return FILE_MISSING; } - return undeferr("Failed HEAD check for $path (" . $res->code . "): " + return undeferr("Failed HEAD check for $self->{url} (" . $res->code . "): " . $res->message); } } diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 1c0b9eba..31436e25 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -211,15 +211,24 @@ sub check_fid { } } +# returns true if all size checks succeeded, false otherwise sub parallel_check_sizes { my ($self, $dflist, $cb) = @_; - # serial, for now: (just prepping for future parallel future, - # getting interface right) + my $expect = scalar @$dflist; + my ($good, $done) = (0, 0); + foreach my $df (@$dflist) { - my $size = $df->size_on_disk; - return 0 unless $cb->($df, $size); + $df->size_on_disk(sub { + my ($size) = @_; + $done++; + $good++ if $cb->($df, $size); + }); } - return 1; + + Danga::Socket->SetPostLoopCallback(sub { $done != $expect }); + Danga::Socket->EventLoop; + + return $good == $expect; } # this is the slow path. if something above in check_fid finds From d5cd4cfb0bd9cb4e6ba200063f40e7c0869835ff Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Sep 2012 15:11:35 -0700 Subject: [PATCH 367/405] httpfile: use Net::HTTP::NB, remove LWP::UserAgent This allows us to use the same HTTP connections between digest and HTTP size checks, reducing the number of open connections we need in the Fsck worker. --- lib/MogileFS/HTTPFile.pm | 25 +++++++++++-------------- lib/MogileFS/Server.pm | 1 - lib/MogileFS/Test.pm | 1 + 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 56b983d0..82bed6b8 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -7,9 +7,6 @@ use Digest; use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); -# (caching the connection used for HEAD requests) -my $user_agent; - my %sidechannel_nexterr; # host => next error log time # create a new MogileFS::HTTPFile instance from a URL. not called @@ -242,27 +239,27 @@ retry: sub digest_http { my ($self, $alg, $ping_cb) = @_; - # TODO: refactor - my $node_timeout = MogileFS->config("node_timeout"); - # Hardcoded connection cache size of 20 :( - $user_agent ||= LWP::UserAgent->new(timeout => $node_timeout, keep_alive => 20); my $digest = Digest->new($alg); - my %opts = ( + port => $self->{port}, # default (4K) is tiny, use 1M like replicate - ':read_size_hint' => 0x100000, - ':content_cb' => sub { + read_size_hint => 0x100000, + content_cb => sub { $digest->add($_[0]); $ping_cb->(); - } + }, ); - my $path = $self->{url}; - my $res = $user_agent->get($path, %opts); + my $res; + $self->host->http("GET", $self->{uri}, \%opts, sub { ($res) = @_ }); + + # TODO: async interface for workers already running Danga::Socket->EventLoop + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; return $digest->digest if $res->is_success; return FILE_MISSING if $res->code == 404; - return undeferr("Failed $alg (GET) check for $path (" . $res->code . "): " + return undeferr("Failed $alg (GET) check for $self->{url} (" . $res->code . "): " . $res->message); } diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 45aedba4..ddb4e3de 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -25,7 +25,6 @@ use File::Path (); use Sys::Syslog (); use Time::HiRes (); use Net::Netmask; -use LWP::UserAgent; use List::Util; use Socket qw(SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY); diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 9da487e8..28adf074 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -7,6 +7,7 @@ use DBI; use FindBin qw($Bin); use IO::Socket::INET; use MogileFS::Server; +use LWP::UserAgent; use base 'Exporter'; our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker &try_for &want); From d45c8a6cf76808c07732d53e8e0cededa8a7cf62 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Sep 2012 16:01:53 -0700 Subject: [PATCH 368/405] httpfile: use HTTP connection pool for DELETE This simplifies the delete subroutine and should reduce the number of sockets created during rebalance. --- lib/MogileFS/HTTPFile.pm | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 82bed6b8..f1734bc7 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -2,7 +2,6 @@ package MogileFS::HTTPFile; use strict; use warnings; use Carp qw(croak); -use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); use Digest; use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); @@ -56,36 +55,19 @@ sub delete { my $self = shift; my %opts = @_; my ($host, $port) = ($self->{host}, $self->{port}); + my %http_opts = ( port => $port ); + my $res; - my $httpsock = IO::Socket::INET->new(PeerAddr => $host, PeerPort => $port, Timeout => 2) - or die "can't connect to $host:$port in 2 seconds"; - - $httpsock->write("DELETE $self->{uri} HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - - my $keep_alive = 0; - my $did_del = 0; - - while (defined (my $line = <$httpsock>)) { - $line =~ s/[\s\r\n]+$//; - last unless length $line; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - my $rescode = $1; - # make sure we get a good response - if ($rescode == 404 && $opts{ignore_missing}) { - $did_del = 1; - next; - } - unless ($rescode == 204) { - die "Bad response from $host:$port: [$line]"; - } - $did_del = 1; - next; - } - die "Unexpected HTTP response line during DELETE from $host:$port: [$line]" unless $did_del; - } - die "Didn't get valid HTTP response during DELETE from $host:port" unless $did_del; + $self->host->http("DELETE", $self->{uri}, \%http_opts, sub { ($res) = @_ }); - return 1; + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + + if ($res->code == 204 || ($res->code == 404 && $opts{ignore_missing})) { + return 1; + } + my $line = $res->status_line; + die "Bad response on DELETE $self->{url}: [$line]"; } # returns size of file, (doing a HEAD request and looking at content-length) From 13e5fe2a4487acdc5dae8336eabab4d673ceb443 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 11 Sep 2012 17:11:10 -0700 Subject: [PATCH 369/405] delete worker uses persistent HTTP connections This allows us to avoid running ourselves out of local ports when handling massive delete storms. Eventually, we can parallelize deletes in a manner similar to fsck size checking. --- lib/MogileFS/Util.pm | 10 ---- lib/MogileFS/Worker/Delete.pm | 103 +++++++++++----------------------- 2 files changed, 34 insertions(+), 79 deletions(-) diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 2a0816d9..b9cec58d 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -252,16 +252,6 @@ sub wait_for_writeability { return $nfound ? 1 : 0; } -# if given an HTTP URL, break it down into [ host, port, URI ], else -# returns die, because we don't support non-http-mode anymore -sub url_parts { - my $path = shift; - if ($path =~ m!^http://(.+?)(?::(\d+))?(/.+)$!) { - return [ $1, $2 || 80, $3 ]; - } - Carp::croak("Bogus URL: $path"); -} - sub max { my ($n1, $n2) = @_; return $n1 if $n1 > $n2; diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index a811e6e6..ff4a421f 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -11,8 +11,6 @@ use MogileFS::Server; use constant LIMIT => 1000; use constant PER_BATCH => 100; -# TODO: use LWP and persistent connections to do deletes. less local ports used. - sub new { my ($class, $psock) = @_; my $self = fields::new($class); @@ -75,6 +73,29 @@ sub work { } +# deletes a given DevFID from the storage device +# returns true on success, false on failure +sub delete_devfid { + my ($self, $dfid) = @_; + + # send delete request + error("Sending delete for " . $dfid->url) if $Mgd::DEBUG >= 2; + + my $res; + $dfid->device->host->http("DELETE", $dfid->uri_path, undef, sub { ($res) = @_ }); + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + + my $httpcode = $res->code; + + # effectively means all went well + return 1 if (($httpcode >= 200 && $httpcode <= 299) || $httpcode == 404); + + my $status = $res->status_line; + error("Error: unlink failure: " . $dfid->url . ": HTTP code $status"); + return 0; +} + sub process_tempfiles { my $self = shift; # also clean the tempfile table @@ -212,46 +233,16 @@ sub process_deletes2 { next; } - my $urlparts = MogileFS::Util::url_parts($path); - - # hit up the server and delete it - # TODO: (optimization) use MogileFS->get_observed_state and don't - # try to delete things known to be down/etc - my $sock = IO::Socket::INET->new(PeerAddr => $urlparts->[0], - PeerPort => $urlparts->[1], - Timeout => 2); - # this used to mark the device as down for the whole tracker. - # if the device is actually down, we can struggle until the - # monitor job figures it out... otherwise an occasional timeout - # due to high load will prevent delete from working at all. - unless ($sock) { + if ($self->delete_devfid($dfid)) { + # effectively means all went well + $sto->remove_fidid_from_devid($fidid, $devid); + delete $devids{$devid}; + } else { + # remote file system error? connect failure? retry in 30min $sto->reschedule_file_to_delete2_relative($fidid, - 60 * 60 * (1 + $todo->{failcount})); + 60 * 30 * (1 + $todo->{failcount})); next; } - - # send delete request - error("Sending delete for $path") if $Mgd::DEBUG >= 2; - - $sock->write("DELETE $urlparts->[2] HTTP/1.0\r\n\r\n"); - my $response = <$sock>; - if ($response =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - if (($1 >= 200 && $1 <= 299) || $1 == 404) { - # effectively means all went well - $sto->remove_fidid_from_devid($fidid, $devid); - delete $devids{$devid}; - } else { - # remote file system error? mark node as down - my $httpcode = $1; - error("Error: unlink failure: $path: HTTP code $httpcode"); - - $sto->reschedule_file_to_delete2_relative($fidid, - 60 * 30 * (1 + $todo->{failcount})); - next; - } - } else { - error("Error: unknown response line deleting $path: $response"); - } } # fid has no pants. @@ -357,37 +348,11 @@ sub process_deletes { next; } - my $urlparts = MogileFS::Util::url_parts($path); - - # hit up the server and delete it - # TODO: (optimization) use MogileFS->get_observed_state and don't try to delete things known to be down/etc - my $sock = IO::Socket::INET->new(PeerAddr => $urlparts->[0], - PeerPort => $urlparts->[1], - Timeout => 2); - unless ($sock) { - # timeout or something, mark this device as down for now and move on - $reschedule_fid->(60 * 60 * 2, "no_sock_to_hostid"); - next; - } - - # send delete request - error("Sending delete for $path") if $Mgd::DEBUG >= 2; - - $sock->write("DELETE $urlparts->[2] HTTP/1.0\r\n\r\n"); - my $response = <$sock>; - if ($response =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - if (($1 >= 200 && $1 <= 299) || $1 == 404) { - # effectively means all went well - $done_with_devid->("deleted"); - } else { - # remote file system error? mark node as down - my $httpcode = $1; - error("Error: unlink failure: $path: HTTP code $httpcode"); - $reschedule_fid->(60 * 30, "http_code_$httpcode"); - next; - } + if ($self->delete_devfid($dfid)) { + $done_with_devid->("deleted"); } else { - error("Error: unknown response line deleting $path: $response"); + # remote file system error? connect failure? retry in 30min + $reschedule_fid->(60 * 30, "http_failure"); } } From bdeaaf9d5aa35ed3c41ab1d3c402282d9cec6a5c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 19 Sep 2012 17:25:10 -0700 Subject: [PATCH 370/405] device: reuse HTTP connections for MKCOL This can reduce latency for folks still stuck with MKCOL. This creates no new sockets for replicate and monitor in all cases, as connections to the HTTP DAV server are already used in those workers. This only adds new persistent connections to the queryworker if GET-only HTTP ports are configured (queryworker already may call HTTPFile->size). --- lib/MogileFS/Device.pm | 80 +++++++++++++++++++--------------- lib/MogileFS/Worker/Monitor.pm | 6 +-- 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 4a253227..3374aa91 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -182,52 +182,60 @@ sub doesnt_know_mkcol { # Gross class-based singleton cache. my %dir_made; # /dev/path -> $time my $dir_made_lastclean = 0; -# returns 1 on success, 0 on failure + +# if no callback is supplied: returns 1 on success, 0 on failure +# if a callback is supplied, the return value will be passed to the callback +# upon completion. sub create_directory { - my ($self, $uri) = @_; - return 1 if $self->doesnt_know_mkcol || MogileFS::Config->server_setting_cached('skip_mkcol'); + my ($self, $uri, $cb) = @_; + if ($self->doesnt_know_mkcol || MogileFS::Config->server_setting_cached('skip_mkcol')) { + return $cb ? $cb->(1) : 1; + } # rfc2518 says we "should" use a trailing slash. Some servers # (nginx) appears to require it. $uri .= '/' unless $uri =~ m/\/$/; - return 1 if $dir_made{$uri}; - - my $hostid = $self->hostid; - my $host = $self->host; - my $hostip = $host->ip or return 0; - my $port = $host->http_port or return 0; - my $peer = "$hostip:$port"; - - my $sock = IO::Socket::INET->new(PeerAddr => $peer, Timeout => 1) - or return 0; - - print $sock "MKCOL $uri HTTP/1.0\r\n". - "Content-Length: 0\r\n\r\n"; - - my $ans = <$sock>; - - # if they don't support this method, remember that - if ($ans && $ans =~ m!HTTP/1\.[01] (400|501)!) { - $self->{no_mkcol} = 1; - # TODO: move this into method in *monitor* worker - return 1; + if ($dir_made{$uri}) { + return $cb ? $cb->(1) : 1; } - return 0 unless $ans && $ans =~ m!^HTTP/1.[01] 2\d\d!; + my $res; + my $on_mkcol_response = sub { + if ($res->is_success) { + my $now = time(); + $dir_made{$uri} = $now; + + # cleanup %dir_made occasionally. + my $clean_interval = 300; # every 5 minutes. + if ($dir_made_lastclean < $now - $clean_interval) { + $dir_made_lastclean = $now; + foreach my $k (keys %dir_made) { + delete $dir_made{$k} if $dir_made{$k} < $now - 3600; + } + } + return 1; + } elsif ($res->code =~ /\A(?:400|501)\z/) { + # if they don't support this method, remember that + # TODO: move this into method in *monitor* worker + $self->{no_mkcol} = 1; + return 1; + } else { + return 0; + } + }; - my $now = time(); - $dir_made{$uri} = $now; + my %opts = ( headers => { "Content-Length" => "0" } ); + $self->host->http("MKCOL", $uri, \%opts, sub { + ($res) = @_; + $cb->($on_mkcol_response->()) if $cb; + }); - # cleanup %dir_made occasionally. - my $clean_interval = 300; # every 5 minutes. - if ($dir_made_lastclean < $now - $clean_interval) { - $dir_made_lastclean = $now; - foreach my $k (keys %dir_made) { - delete $dir_made{$k} if $dir_made{$k} < $now - 3600; - } - } - return 1; + return if $cb; + + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + return $on_mkcol_response->(); } sub fid_list { diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 4653d886..3062cdbd 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -495,9 +495,9 @@ sub check_device { # first, create the test-write directory. this will return # immediately after the first time, as the 'create_directory' # function caches what it's already created. - $dev->create_directory("/dev$devid/test-write"); # XXX synchronous - - return $self->check_write($dev); + $dev->create_directory("/dev$devid/test-write", sub { + $self->check_write($dev); + }); }); } From 7e7e53075cffe4726e7a3c400c90b1616ae2e21c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 19 Sep 2012 19:06:22 -0700 Subject: [PATCH 371/405] create_open: parallelize directory vivification For setups stuck needing MKCOL, we can parallelize directory vivification for multi-destination uploads. --- lib/MogileFS/DevFID.pm | 4 ++-- lib/MogileFS/Device.pm | 16 ++++++++++++---- lib/MogileFS/Worker/Query.pm | 19 +++++++++++++++++-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 1b11719f..52bffc19 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -52,9 +52,9 @@ sub get_url { } sub vivify_directories { - my $self = shift; + my ($self, $cb) = @_; my $url = $self->url; - $self->device()->vivify_directories($url); + $self->device()->vivify_directories($url, $cb); } # returns 0 on missing, diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 3374aa91..2ea1cd9b 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -286,7 +286,7 @@ sub can_change_to_state { } sub vivify_directories { - my ($self, $path) = @_; + my ($self, $path, $cb) = @_; # $path is something like: # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid @@ -302,9 +302,17 @@ sub vivify_directories { die "devid mismatch" unless $self->id == $devid; - $self->create_directory("/dev$devid/$p1"); - $self->create_directory("/dev$devid/$p1/$p2"); - $self->create_directory("/dev$devid/$p1/$p2/$p3"); + if ($cb) { + $self->create_directory("/dev$devid/$p1", sub { + $self->create_directory("/dev$devid/$p1/$p2", sub { + $self->create_directory("/dev$devid/$p1/$p2/$p3", $cb); + }); + }); + } else { + $self->create_directory("/dev$devid/$p1"); + $self->create_directory("/dev$devid/$p1/$p2"); + $self->create_directory("/dev$devid/$p1/$p2/$p3"); + } } # Compatibility interface since this old routine is unfortunately called diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 185420c2..bde7f987 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -294,12 +294,22 @@ sub cmd_create_open { } # make sure directories exist for client to be able to PUT into + my %dir_done; + $profstart->("vivify_dir_on_all_devs"); + + my $t0 = Time::HiRes::time(); foreach my $dev (@dests) { - $profstart->("vivify_dir_on_dev" . $dev->id); my $dfid = MogileFS::DevFID->new($dev, $fidid); - $dfid->vivify_directories; + $dfid->vivify_directories(sub { + $dir_done{$dfid->devid} = Time::HiRes::time() - $t0; + }); } + # don't start the event loop if results are all cached + if (scalar keys %dir_done != scalar @dests) { + Danga::Socket->SetPostLoopCallback(sub { scalar keys %dir_done != scalar @dests }); + Danga::Socket->EventLoop; + } $profstart->("end"); # common reply variables @@ -317,6 +327,11 @@ sub cmd_create_open { sprintf("%0.03f", $profpoints[$i+1]->[1] - $profpoints[$i]->[1]); } + while (my ($devid, $time) = each %dir_done) { + my $ptnum = ++$res->{profpoints}; + $res->{"prof_${ptnum}_name"} = "vivify_dir_on_dev$devid"; + $res->{"prof_${ptnum}_time"} = sprintf("%0.03f", $time); + } } # add path info From b8a0674fe997c2db75ea5cd707cc638f106421fd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 18 Sep 2012 18:00:39 -0700 Subject: [PATCH 372/405] replicate: enforce expected Content-Length in http_copy There's no reason we should ever skip Content-Length validation if we know which FID we're replicating and have an FID object handy. Conflicts: lib/MogileFS/Worker/Replicate.pm --- lib/MogileFS/Worker/Replicate.pm | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index 5806a6b6..bd5c4060 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -477,9 +477,7 @@ sub replicate { my $rv = http_copy( sdevid => $sdevid, ddevid => $ddevid, - fid => $fidid, - rfid => $fid, - expected_len => undef, # FIXME: get this info to pass along + fid => $fid, errref => \$copy_err, callback => sub { $worker->still_alive; }, digest => $digest, @@ -529,20 +527,21 @@ sub replicate { # copies a file from one Perlbal to another utilizing HTTP sub http_copy { my %opts = @_; - my ($sdevid, $ddevid, $fid, $rfid, $expected_clen, $intercopy_cb, $errref, $digest) = + my ($sdevid, $ddevid, $fid, $intercopy_cb, $errref, $digest) = map { delete $opts{$_} } qw(sdevid ddevid fid - rfid - expected_len callback errref digest ); die if %opts; + $fid = MogileFS::FID->new($fid) unless ref($fid); + my $fidid = $fid->id; + my $expected_clen = $fid->length; my $content_md5 = ''; - my $fid_checksum = $rfid->checksum; + my $fid_checksum = $fid->checksum; if ($fid_checksum && $fid_checksum->hashname eq "MD5") { # some HTTP servers may be able to verify Content-MD5 on PUT # and reject corrupted requests. no HTTP server should reject @@ -556,7 +555,7 @@ sub http_copy { # handles setting unreachable magic; $error->(reachability, "message") my $error_unreachable = sub { $$errref = "src_error" if $errref; - return error("Fid $fid unreachable while replicating: $_[0]"); + return error("Fid $fidid unreachable while replicating: $_[0]"); }; my $dest_error = sub { @@ -575,7 +574,7 @@ sub http_copy { my $sdev = Mgd::device_factory()->get_by_id($sdevid); my $ddev = Mgd::device_factory()->get_by_id($ddevid); - return error("Error: unable to get device information: source=$sdevid, destination=$ddevid, fid=$fid") + return error("Error: unable to get device information: source=$sdevid, destination=$ddevid, fid=$fidid") unless $sdev && $ddev; my $s_dfid = MogileFS::DevFID->new($sdev, $fid); @@ -591,7 +590,7 @@ sub http_copy { my ($dhostip, $dport) = ($dhost->ip, $dhost->http_port); unless (defined $spath && defined $dpath && defined $shostip && defined $dhostip && $sport && $dport) { # show detailed information to find out what's not configured right - error("Error: unable to replicate file fid=$fid from device id $sdevid to device id $ddevid"); + error("Error: unable to replicate file fid=$fidid from device id $sdevid to device id $ddevid"); error(" http://$shostip:$sport$spath -> http://$dhostip:$dport$dpath"); return 0; } @@ -607,7 +606,7 @@ sub http_copy { # for specific files. my $shttphost; MogileFS::run_global_hook('replicate_alternate_source', - $rfid, \$shostip, \$sport, \$spath, \$shttphost); + $fid, \$shostip, \$sport, \$spath, \$shttphost); # okay, now get the file my $sock = IO::Socket::INET->new(PeerAddr => $shostip, PeerPort => $sport, Timeout => 2) @@ -636,7 +635,7 @@ sub http_copy { $clen = $1; } return $error_unreachable->("File $spath has unexpected content-length of $clen, not $expected_clen") - if defined $expected_clen && $clen != $expected_clen; + if $clen != $expected_clen; # open target for put my $dsock = IO::Socket::INET->new(PeerAddr => $dhostip, PeerPort => $dport, Timeout => 2) @@ -702,7 +701,7 @@ sub http_copy { if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { if ($1 >= 200 && $1 <= 299) { if ($digest) { - my $alg = ($fid_checksum && $fid_checksum->hashname) || $rfid->class->hashname; + my $alg = ($fid_checksum && $fid_checksum->hashname) || $fid->class->hashname; if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { # dest device would've rejected us with a error, From 905440513860df892bbd0631a9bc88711581a956 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 4 Feb 2013 20:58:02 +0000 Subject: [PATCH 373/405] replicate: use persistent connection from pool if possible This should reduce the amount of TIME-WAIT sockets and TCP handshakes when replicating, especially with small files. An attempt was previously made to use the Net::HTTP::NB API directly, but that resulted in complicated callback nesting and state management needed to throttle the reader if the sender socket were blocked in any way. There were many bugs in the early version of this code as a result of the complicated code. Even after all the bugs got fixed, a small performance reduction due to the extra buffer copies was difficult to avoid. Thus I started using the synchronous version to keep the code simple and fast while preserving the ability to use persistent sockets to avoid excessive TIME-WAIT and handshaking for small file replication. --- lib/MogileFS/Worker/Replicate.pm | 263 ++++++++++++++++++++----------- 1 file changed, 173 insertions(+), 90 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index bd5c4060..f539710e 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -524,6 +524,37 @@ sub replicate { "replication policy ran out of suggestions for us replicating fid $fidid"); } +# Returns a hashref with the following: +# { +# code => HTTP status code integer, +# keep => boolean, whether to keep the connection after reading +# len => value of the Content-Length header (integer) +# } +sub read_headers { + my ($sock) = @_; + my %rv = (); + # FIXME: this can block. needs to timeout. + my $line = <$sock>; + return unless defined $line; + $line =~ m!\AHTTP/(\d+\.\d+)\s+(\d+)! or return; + $rv{keep} = $1 >= 1.1; + $rv{code} = $2; + + while (1) { + $line = <$sock>; + return unless defined $line; + last if $line =~ /\A\r?\n\z/; + if ($line =~ /\AConnection:\s*keep-alive\s*\z/is) { + $rv{keep} = 1; + } elsif ($line =~ /\AConnection:\s*close\s*\z/is) { + $rv{keep} = 0; + } elsif ($line =~ /\AContent-Length:\s*(\d+)\s*\z/is) { + $rv{len} = $1; + } + } + return \%rv; +} + # copies a file from one Perlbal to another utilizing HTTP sub http_copy { my %opts = @_; @@ -540,7 +571,9 @@ sub http_copy { $fid = MogileFS::FID->new($fid) unless ref($fid); my $fidid = $fid->id; my $expected_clen = $fid->length; + my $clen; my $content_md5 = ''; + my ($sconn, $dconn); my $fid_checksum = $fid->checksum; if ($fid_checksum && $fid_checksum->hashname eq "MD5") { # some HTTP servers may be able to verify Content-MD5 on PUT @@ -552,22 +585,25 @@ sub http_copy { $intercopy_cb ||= sub {}; + my $err_common = sub { + my ($err, $msg) = @_; + $$errref = $err if $errref; + $sconn->close($err) if $sconn; + $dconn->close($err) if $dconn; + return error($msg); + }; + # handles setting unreachable magic; $error->(reachability, "message") my $error_unreachable = sub { - $$errref = "src_error" if $errref; - return error("Fid $fidid unreachable while replicating: $_[0]"); + return $err_common->("src_error", "Fid $fidid unreachable while replicating: $_[0]"); }; my $dest_error = sub { - $$errref = "dest_error" if $errref; - error($_[0]); - return 0; + return $err_common->("dest_error", $_[0]); }; my $src_error = sub { - $$errref = "src_error" if $errref; - error($_[0]); - return 0; + return $err_common->("src_error", $_[0]); }; # get some information we'll need @@ -595,95 +631,119 @@ sub http_copy { return 0; } + my $put = "PUT $dpath HTTP/1.0\r\nConnection: keep-alive\r\n" . + "Content-length: $expected_clen$content_md5\r\n\r\n"; + # need by webdav servers, like lighttpd... $ddev->vivify_directories($d_dfid->url); - # setup our pipe error handler, in case we get closed on - my $pipe_closed = 0; - local $SIG{PIPE} = sub { $pipe_closed = 1; }; - # call a hook for odd casing completely different source data # for specific files. my $shttphost; MogileFS::run_global_hook('replicate_alternate_source', $fid, \$shostip, \$sport, \$spath, \$shttphost); + my $durl = "http://$dhostip:$dport$dpath"; + my $surl = "http://$shostip:$sport$spath"; # okay, now get the file - my $sock = IO::Socket::INET->new(PeerAddr => $shostip, PeerPort => $sport, Timeout => 2) + my %sopts = ( ip => $shostip, port => $sport ); + + my $get = "GET $spath HTTP/1.0\r\nConnection: keep-alive\r\n"; + # plugin set a custom host. + $get .= "Host: $shttphost\r\n" if $shttphost; + + my $data = ''; + my ($sock, $dsock); + my ($wcount, $bytes_to_read, $written, $remain); + my ($stries, $dtries) = (0, 0); + +retry: + $sconn->close("retrying") if $sconn; + $dconn->close("retrying") if $dconn; + $dconn = undef; + $stries++; + $sconn = $shost->http_conn_get(\%sopts) or return $src_error->("Unable to create source socket to $shostip:$sport for $spath"); - unless ($shttphost) { - $sock->write("GET $spath HTTP/1.0\r\n\r\n"); - } else { - # plugin set a custom host. - $sock->write("GET $spath HTTP/1.0\r\nHost: $shttphost\r\n\r\n"); + $sock = $sconn->sock; + unless ($sock->write("$get\r\n")) { + goto retry if $sconn->retryable && $stries == 1; + return $src_error->("Pipe closed retrieving $spath from $shostip:$sport"); } - return error("Pipe closed retrieving $spath from $shostip:$sport") - if $pipe_closed; # we just want a content length - my $clen; - # FIXME: this can block. needs to timeout. - while (defined (my $line = <$sock>)) { - $line =~ s/[\s\r\n]+$//; - last unless length $line; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - # make sure we get a good response - return $error_unreachable->("Error: Resource http://$shostip:$sport$spath failed: HTTP $1") - unless $1 >= 200 && $1 <= 299; - } - next unless $line =~ /^Content-length:\s*(\d+)\s*$/i; - $clen = $1; + my $sres = read_headers($sock); + unless ($sres) { + goto retry if $sconn->retryable && $stries == 1; + return $error_unreachable->("Error: Resource $surl failed to return an HTTP response"); + } + unless ($sres->{code} >= 200 && $sres->{code} <= 299) { + return $error_unreachable->("Error: Resource $surl failed: HTTP $sres->{code}"); } + $clen = $sres->{len}; + return $error_unreachable->("File $spath has unexpected content-length of $clen, not $expected_clen") if $clen != $expected_clen; # open target for put - my $dsock = IO::Socket::INET->new(PeerAddr => $dhostip, PeerPort => $dport, Timeout => 2) + $dtries++; + $dconn = $dhost->http_conn_get or return $dest_error->("Unable to create dest socket to $dhostip:$dport for $dpath"); - $dsock->write("PUT $dpath HTTP/1.0\r\nContent-length: $clen$content_md5\r\n\r\n") - or return $dest_error->("Unable to write data to $dpath on $dhostip:$dport"); - return $dest_error->("Pipe closed during write to $dpath on $dhostip:$dport") - if $pipe_closed; + $dsock = $dconn->sock; + + unless ($dsock->write($put)) { + goto retry if $dconn->retryable && $dtries == 1; + return $dest_error->("Pipe closed during write to $dpath on $dhostip:$dport"); + } # now read data and print while we're reading. - my ($data, $written, $remain) = ('', 0, $clen); - my $bytes_to_read = 1024*1024; # read 1MB at a time until there's less than that remaining + ($written, $remain) = (0, $clen); + $bytes_to_read = 1024*1024; # read 1MB at a time until there's less than that remaining $bytes_to_read = $remain if $remain < $bytes_to_read; - my $finished_read = 0; - - if ($bytes_to_read) { - while (!$pipe_closed && (my $bytes = $sock->read($data, $bytes_to_read))) { - # now we've read in $bytes bytes - $remain -= $bytes; - $bytes_to_read = $remain if $remain < $bytes_to_read; - $digest->add($data) if $digest; - - my $data_len = $bytes; - my $data_off = 0; - while (1) { - my $wbytes = syswrite($dsock, $data, $data_len, $data_off); - unless (defined $wbytes) { - return $dest_error->("Error: syswrite failed after $written bytes with: $!; failed putting to $dpath"); - } - $written += $wbytes; - $intercopy_cb->(); - last if ($data_len == $wbytes); + $wcount = 0; + + while ($bytes_to_read) { + my $bytes = $sock->read($data, $bytes_to_read); + unless (defined $bytes) { + return $src_error->("error reading midway through source: $!"); + } + if ($bytes == 0) { + return $src_error->("EOF reading midway through source: $!"); + } - $data_len -= $wbytes; - $data_off += $wbytes; + # now we've read in $bytes bytes + $remain -= $bytes; + $bytes_to_read = $remain if $remain < $bytes_to_read; + $digest->add($data) if $digest; + + my $data_len = $bytes; + my $data_off = 0; + while (1) { + my $wbytes = syswrite($dsock, $data, $data_len, $data_off); + unless (defined $wbytes) { + # it can take two writes to determine if a socket is dead + # (TCP_NODELAY and TCP_CORK are (and must be) zero here) + goto retry if (!$wcount && $dconn->retryable && $dtries == 1); + return $dest_error->("Error: syswrite failed after $written bytes with: $!; failed putting to $dpath"); } + $wcount++; + $written += $wbytes; + $intercopy_cb->(); + last if ($data_len == $wbytes); - die if $bytes_to_read < 0; - next if $bytes_to_read; - $finished_read = 1; - last; + $data_len -= $wbytes; + $data_off += $wbytes; } + + die if $bytes_to_read < 0; + } + + # source connection drained, return to pool + if ($sres->{keep}) { + $shost->http_conn_put($sconn); + $sconn = undef; } else { - # 0 byte file copy. - $finished_read = 1; + $sconn->close("http_close"); } - return $dest_error->("closed pipe writing to destination") if $pipe_closed; - return $src_error->("error reading midway through source: $!") unless $finished_read; # callee will want this digest, too, so clone as "digest" is destructive $digest = $digest->clone->digest if $digest; @@ -697,32 +757,55 @@ sub http_copy { } # now read in the response line (should be first line) - my $line = <$dsock>; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - if ($1 >= 200 && $1 <= 299) { - if ($digest) { - my $alg = ($fid_checksum && $fid_checksum->hashname) || $fid->class->hashname; - - if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { - # dest device would've rejected us with a error, - # no need to reread the file - return 1; - } - my $durl = "http://$dhostip:$dport$dpath"; - my $httpfile = MogileFS::HTTPFile->at($durl); - my $actual = $httpfile->digest($alg, $intercopy_cb); - if ($actual ne $digest) { - my $expect = unpack("H*", $digest); - $actual = unpack("H*", $actual); - return $dest_error->("checksum mismatch on PUT, expected: $expect actual: $digest"); - } + my $dres = read_headers($dsock); + unless ($dres) { + goto retry if (!$wcount && $dconn->retryable && $dtries == 1); + return $dest_error->("Error: HTTP response line not recognized writing to $durl"); + } + + # drain the response body if there is one + # there may be no dres->{len}/Content-Length if there is no body + if ($dres->{len}) { + my $r = $dsock->read($data, $dres->{len}); # dres->{len} should be tiny + if (defined $r) { + if ($r != $dres->{len}) { + Mgd::error("Failed to read $r of Content-Length:$dres->{len} bytes for PUT response on $durl"); + $dres->{keep} = 0; } - return 1; + } else { + Mgd::error("Failed to read Content-Length:$dres->{len} bytes for PUT response on $durl ($!)"); + $dres->{keep} = 0; } - return $dest_error->("Got HTTP status code $1 PUTing to http://$dhostip:$dport$dpath"); + } + + # return the connection back to the connection pool + if ($dres->{keep}) { + $dhost->http_conn_put($dconn); + $dconn = undef; } else { - return $dest_error->("Error: HTTP response line not recognized writing to http://$dhostip:$dport$dpath: $line"); + $dconn->close("http_close"); + } + + if ($dres->{code} >= 200 && $dres->{code} <= 299) { + if ($digest) { + my $alg = ($fid_checksum && $fid_checksum->hashname) || $fid->class->hashname; + + if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { + # dest device would've rejected us with a error, + # no need to reread the file + return 1; + } + my $httpfile = MogileFS::HTTPFile->at($durl); + my $actual = $httpfile->digest($alg, $intercopy_cb); + if ($actual ne $digest) { + my $expect = unpack("H*", $digest); + $actual = unpack("H*", $actual); + return $dest_error->("checksum mismatch on PUT, expected: $expect actual: $digest"); + } + } + return 1; } + return $dest_error->("Got HTTP status code $dres->{code} PUTing to $durl"); } 1; From c562af231d9a0c962d055bb1690018d1810e2c6d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 21 Feb 2013 00:28:27 +0000 Subject: [PATCH 374/405] host: handle case where conn_get may return undef MogileFS::ConnectionPool::conn_get may return undef on some errors, so we must account for that and not kill the replicate worker. --- lib/MogileFS/Host.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 11e75453..2a119cf2 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -123,7 +123,7 @@ sub http_conn_get { _init_pools(); my $conn = $http_pool->conn_get($ip, $port); - $conn->sock->blocking(1); + $conn->sock->blocking(1) if $conn; return $conn; } From 72758a544a593615dfc9bd4c79b98e107805cac5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 21 Feb 2013 04:19:13 +0000 Subject: [PATCH 375/405] ConnectionPool: improve reporting of socket creation errors Send the entire error message (including intended host:port so it is more informative when it propagates to Connection::HTTP::err_response. We also do not need to log the error in ConnectionPool, as the error will be logged by the caller. While we're at it, fix the documentation and a spelling error in err_response, too. --- lib/MogileFS/Connection/HTTP.pm | 2 +- lib/MogileFS/ConnectionPool.pm | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Connection/HTTP.pm b/lib/MogileFS/Connection/HTTP.pm index 1c8e87e2..84a210ca 100644 --- a/lib/MogileFS/Connection/HTTP.pm +++ b/lib/MogileFS/Connection/HTTP.pm @@ -211,7 +211,7 @@ sub err_response { my ($self, $err, $http_res_cb) = @_; my $res = HTTP::Response->new(500, $err); - $err ||= "(unspecifed error)"; + $err ||= "(unspecified error)"; my $req = $self->{http_req} || "no HTTP request made"; Mgd::error("$err: $req"); $res->header("X-MFS-Error", $err); diff --git a/lib/MogileFS/ConnectionPool.pm b/lib/MogileFS/ConnectionPool.pm index 2b9a59d9..88d0eca1 100644 --- a/lib/MogileFS/ConnectionPool.pm +++ b/lib/MogileFS/ConnectionPool.pm @@ -114,7 +114,7 @@ sub _conn_new_maybe { } # creates new connection and registers it in our fdmap -# returns undef if resources (FDs, buffers) aren't available +# returns error string if resources (FDs, buffers) aren't available sub _conn_new { my ($self, $ip, $port) = @_; @@ -130,10 +130,7 @@ sub _conn_new { # EMFILE/ENFILE should never happen as the capacity for this # pool is far under the system defaults. Just give up on # EMFILE/ENFILE like any other error. - my $mfs_err = $!; - Mgd::log('err', "failed to create socket to $ip:$port ($mfs_err)"); - - return $mfs_err; + return "failed to create socket to $ip:$port ($!)"; } } From 66c8827c22f05b67ec4a8812edbdfe69ff6e5a68 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 22 Feb 2013 03:32:51 +0000 Subject: [PATCH 376/405] t/http.t: test error handling on non-running server We need to ensure we don't blow up a worker process if a server is shutdown and a connection attempted before the monitor notices. --- t/http.t | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/http.t b/t/http.t index 04ee993a..22c40443 100644 --- a/t/http.t +++ b/t/http.t @@ -289,6 +289,26 @@ sim_node_timeout(1); has_nothing_queued(); } +# server is not running +{ + my $resp; + + # we want an empty pool to avoid retries + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $http->close; # $http is unusable after this + $host->http("GET", "/fail", undef, sub { $resp = $_[0] }); + Danga::Socket->EventLoop; + ok(! $resp->is_success, "HTTP response is not successful"); + ok($resp->header("X-MFS-Error"), "X-MFS-Error is set"); + is(0, scalar @$pool, "connection pool is empty"); + + has_nothing_inflight(); + has_nothing_queued(); +} + done_testing(); sub has_nothing_inflight { From 9b2b87a9507698c3b4757953f99f2a47501638fe Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 22 Feb 2013 03:41:13 +0000 Subject: [PATCH 377/405] connection/{poolable,http}: common retry logic for timeouts We will want similar logic for Mogstored sidechannel to avoid retrying on timeout. --- lib/MogileFS/Connection/HTTP.pm | 2 +- lib/MogileFS/Connection/Poolable.pm | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Connection/HTTP.pm b/lib/MogileFS/Connection/HTTP.pm index 84a210ca..3ac75412 100644 --- a/lib/MogileFS/Connection/HTTP.pm +++ b/lib/MogileFS/Connection/HTTP.pm @@ -191,7 +191,7 @@ sub err { my $http_res_cb = delete $self->{http_res_cb}; # don't retry if we already got a response header nor if we got a timeout - if ($reason !~ /timeout/ && $self->retryable && $http_res_cb && !$self->{http_response}) { + if ($self->retryable($reason) && $http_res_cb && !$self->{http_response}) { # do not call inflight_expire here, since we need inflight_cb # for retrying diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm index 29e7f01d..7106e9e5 100644 --- a/lib/MogileFS/Connection/Poolable.pm +++ b/lib/MogileFS/Connection/Poolable.pm @@ -57,7 +57,10 @@ sub mark_idle { # has ever been marked idle. The connection pool can never be 100% # reliable for detecting dead sockets, and all HTTP requests made by # MogileFS are idempotent. -sub retryable { $_[0]->{mfs_requests} > 0 } +sub retryable { + my ($self, $reason) = @_; + return ($reason !~ /timeout/ && $self->{mfs_requests} > 0); +} # Sets (or updates) the timeout of the connection # timeout_key is "node_timeout" or "conn_timeout" From fa80d20142f6345ecf96488249d2651c0034b234 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 23 Feb 2013 02:28:29 +0000 Subject: [PATCH 378/405] connection/poolable: stricter timeout key check String representations of small floating point values may be in (scientific) E notation, so we must ensure the entire string is free of decimal digits before considering it a configuration key. --- lib/MogileFS/Connection/Poolable.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm index 7106e9e5..16704ff5 100644 --- a/lib/MogileFS/Connection/Poolable.pm +++ b/lib/MogileFS/Connection/Poolable.pm @@ -72,7 +72,7 @@ sub set_timeout { if ($timeout_key) { my $timeout; - if ($timeout_key =~ /[a-z_]/) { + if ($timeout_key =~ /\A[a-z_]+\z/) { $timeout = MogileFS->config($timeout_key) || 2; } else { $timeout = $timeout_key; From 11e3cdca95ad8e0621549af6a2320de24d3a6f9f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Sep 2012 23:21:40 +0000 Subject: [PATCH 379/405] ProcManager: SetAsChild drops inherited IPC sockets Workers only need to inherit the minimum amount necessary from the parent ProcManager. Keeping the socket of unrelated workers in each worker is wasteful and may contribute to premature resource exhaustion. Additionally, we will be using Danga::Socket in more (possibly all) workers, not just the Monitor and Reaper. Resetting in workers that do not use Danga::Socket is harmless and will not allocate epoll/kqueue descriptors until the worker actually uses Danga::Socket. --- lib/MogileFS/ProcManager.pm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 032cb561..44f411ce 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -362,6 +362,10 @@ sub SetAsChild { %ErrorsTo = (); %idle_workers = (); %pending_work = (); + %ChildrenByJob = (); + %child = (); + %todie = (); + %jobs = (); # we just forked from our parent process, also using Danga::Socket, # so we need to lose all that state and start afresh. From 4dd1d6f2c713aefadee746e72279e618a57c689d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 2 Aug 2013 18:35:24 +0000 Subject: [PATCH 380/405] monitor: remove misleading error message for timeout The timeout we're removing includes time spent in the queue waiting to even start, so reporting it in the syslog is confusing, especially since we already log the timeout via Connection::Poolable This avoids a confusing sequence of error messages like the following: [monitor(666)] node_timeout: 2 (elapsed: 2.00099802017212): GET http://127.0.0.1:7500/dev666/usage [monitor(666)] Timeout contacting 127.0.0.1 dev 666 (http://127.0.0.1:7500/dev666/usage): took 2.25 seconds out of 2 allowed Now, we only display the first message. --- lib/MogileFS/Worker/Monitor.pm | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 3062cdbd..74f6d761 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -575,9 +575,6 @@ sub on_usage_response { $self->state_event('host', $dev->hostid, $event); } $self->{skip_host}{$dev->hostid} = 1; - my $timeout = MogileFS->config("node_timeout"); - my $devid = $dev->id; - error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); } return 0; # failure } From 7969ce468bb9400bf3141da8e20671a0ab0165e5 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 18 Aug 2013 20:00:24 -0700 Subject: [PATCH 381/405] Checking in changes prior to tagging of version 2.70. Changelog diff is: diff --git a/CHANGES b/CHANGES index b74f7f4..a6b2872 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,26 @@ +2013-08-18: Release version 2.70 + + * This release features a very large rewrite to the Monitor worker to run + checks in parallel. There are no DB schema changes. + + * replicate: use persistent connection from pool if possible (Eric Wong ) + + * replicate: enforce expected Content-Length in http_copy (Eric Wong ) + + * create_open: parallelize directory vivification (Eric Wong ) + + * device: reuse HTTP connections for MKCOL (Eric Wong ) + + * delete worker uses persistent HTTP connections (Eric Wong ) + + * httpfile: use HTTP connection pool for DELETE (Eric Wong ) + + * httpfile: use Net::HTTP::NB, remove LWP::UserAgent (Eric Wong ) + + * fsck: parallelize size checks for any given FID (Eric Wong ) + + * monitor: refactor/rewrite to use new async API (Eric Wong ) + 2013-08-07: Release version 2.68 * optimize monitor worker for large installs (Eric Wong ) --- CHANGES | 23 +++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index b74f7f4e..a6b28726 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,26 @@ +2013-08-18: Release version 2.70 + + * This release features a very large rewrite to the Monitor worker to run + checks in parallel. There are no DB schema changes. + + * replicate: use persistent connection from pool if possible (Eric Wong ) + + * replicate: enforce expected Content-Length in http_copy (Eric Wong ) + + * create_open: parallelize directory vivification (Eric Wong ) + + * device: reuse HTTP connections for MKCOL (Eric Wong ) + + * delete worker uses persistent HTTP connections (Eric Wong ) + + * httpfile: use HTTP connection pool for DELETE (Eric Wong ) + + * httpfile: use Net::HTTP::NB, remove LWP::UserAgent (Eric Wong ) + + * fsck: parallelize size checks for any given FID (Eric Wong ) + + * monitor: refactor/rewrite to use new async API (Eric Wong ) + 2013-08-07: Release version 2.68 * optimize monitor worker for large installs (Eric Wong ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index ddb4e3de..75bb89ab 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.68"; +$VERSION = "2.70"; =head1 NAME From 202bbef42b78711784f09ae76d8956fad3761834 Mon Sep 17 00:00:00 2001 From: dormando Date: Sun, 14 Dec 2014 20:10:11 -0800 Subject: [PATCH 382/405] add LICENSE file to distro Clarified by Brad Fitzpatrick --- MANIFEST | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST b/MANIFEST index bc8700c6..28a075be 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,5 @@ CHANGES +LICENSE examples/testapp/testapp-perlbal.conf examples/testapp/testapp.psgi examples/testapp/README From b7aff323aab521fb22f2e341c6c8fc0f2cdf3b7e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 25 Sep 2012 20:23:38 +0000 Subject: [PATCH 383/405] host: add "readonly" state to override device "alive" state Marking an entire host as "readonly" before a host maintenance window can useful and easier than marking each device "readonly" and reduces the likelyhood a device will be incorrectly marked as "alive" again when it is intended to stay down. --- lib/MogileFS/Device.pm | 4 ++-- lib/MogileFS/Host.pm | 10 +++++++++- lib/MogileFS/Store.pm | 3 ++- lib/MogileFS/Store/MySQL.pm | 7 +++++++ lib/MogileFS/Store/Postgres.pm | 7 +++++++ lib/MogileFS/Store/SQLite.pm | 1 + t/02-host-device.t | 5 +++++ 7 files changed, 33 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index 2ea1cd9b..0b0e58e2 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -122,12 +122,12 @@ sub dstate { } sub can_delete_from { - return $_[0]->dstate->can_delete_from; + return $_[0]->host->alive && $_[0]->dstate->can_delete_from; } # this method is for Monitor, other workers should use should_read_from sub can_read_from { - return $_[0]->host->alive && $_[0]->dstate->can_read_from; + return $_[0]->host->should_read_from && $_[0]->dstate->can_read_from; } # this is the only method a worker should call for checking for readability diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 2a119cf2..62f7cc3a 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -36,7 +36,7 @@ sub new_from_args { sub valid_state { my ($class, $state) = @_; - return $state && $state =~ /\A(?:alive|dead|down)\z/; + return $state && $state =~ /\A(?:alive|dead|down|readonly)\z/; } # Instance methods: @@ -77,6 +77,14 @@ sub alive { return $_[0]->status eq 'alive'; } +sub readonly { + return $_[0]->status eq 'readonly'; +} + +sub should_read_from { + return $_[0]->alive || $_[0]->readonly; +} + sub observed_reachable { my $self = shift; return $self->{observed_state} && $self->{observed_state} eq 'reachable'; diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index c6a7cf74..62edfee3 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -20,7 +20,8 @@ use List::Util qw(shuffle); # also adds a TEXT 'arg' column to file_to_queue for passing arguments # 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB # 15: adds checksum table, adds 'hashtype' column to 'class' table -use constant SCHEMA_VERSION => 15; +# 16: adds 'readonly' state to enum in host table +use constant SCHEMA_VERSION => 16; sub new { my ($class) = @_; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 32a5a8b5..d985f0e1 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -396,6 +396,13 @@ sub upgrade_modify_device_size { } } +sub upgrade_add_host_readonly { + my $self = shift; + unless ($self->column_type("host", "status") =~ /\breadonly\b/) { + $self->dowell("ALTER TABLE host MODIFY COLUMN status ENUM('alive', 'dead', 'down', 'readonly')"); + } +} + sub pre_daemonize_checks { my $self = shift; # Jay Buffington, from the mailing lists, writes: diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index b057b967..54a4a8f7 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -292,6 +292,13 @@ sub upgrade_add_device_drain { } } +sub upgrade_add_host_readonly { + my $self = shift; + unless ($self->column_constraint("host", "status") =~ /\breadonly\b/) { + $self->dowell("ALTER TABLE host MODIFY COLUMN status VARCHAR(8) CHECK(status IN ('alive', 'dead', 'down', 'readonly'))"); + } +} + sub upgrade_modify_server_settings_value { my $self = shift; unless ($self->column_type("server_settings", "value" =~ /text/i)) { diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 157d3776..ff6281cf 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -363,6 +363,7 @@ sub upgrade_add_device_drain { sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } sub upgrade_modify_device_size { 1 } +sub upgrade_add_host_readonly { 1 } sub BLOB_BIND_TYPE { SQL_BLOB } diff --git a/t/02-host-device.t b/t/02-host-device.t index 8e29c4c8..e95c83c1 100644 --- a/t/02-host-device.t +++ b/t/02-host-device.t @@ -62,8 +62,13 @@ observed_state => 'writeable'}); ok(!$dev->can_read_from, "can_read_from for device fails when host is $s"); ok(!$dev->should_read_from, "device should not be readable when host is $s"); } + $host->{status} = "readonly"; + ok($dev->can_read_from, "device is readable from again"); + ok(! $dev->should_get_new_files, "device should not get new files"); + $host->{status} = "alive"; ok($dev->can_read_from, "device is readable from again"); + ok($dev->should_get_new_files, "device should get new files again"); } # first ensure device status is respected From 569cbb5d9fdb48d0bc96b01d164f22607620dfdd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 26 Sep 2013 00:51:25 +0000 Subject: [PATCH 384/405] enable TCP keepalives for iostat watcher sockets This allows the monitor to eventually notice a client socket is totally gone if a machine death was not detected earlier. We enable TCP keepalive everywhere else, too. --- lib/MogileFS/IOStatWatcher.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/MogileFS/IOStatWatcher.pm b/lib/MogileFS/IOStatWatcher.pm index b6502aa8..3078cd14 100644 --- a/lib/MogileFS/IOStatWatcher.pm +++ b/lib/MogileFS/IOStatWatcher.pm @@ -97,6 +97,7 @@ sub got_disconnect { # Support class that does the communication with individual hosts. package MogileFS::IOStatWatch::Client; +use Socket qw(SO_KEEPALIVE); use strict; use warnings; @@ -117,6 +118,7 @@ sub new { ); return unless $sock; + $sock->sockopt(SO_KEEPALIVE, 1); $self = fields::new($self) unless ref $self; $self->SUPER::new($sock); $self->watch_write(1); From 8593b0e4fe82c7b8b0a190ccf7366a8fbd2b0505 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 14 Oct 2013 19:52:04 +0000 Subject: [PATCH 385/405] add conn_pool_size configuration option This defines the size of the HTTP connection pool. This affects all workers at the moment, but is likely most interesting to the Monitor as it affects the number of devices the monitor may concurrently update. This defaults to 20 (the long-existing, hard-coded value). In the future, there may be a(n easy) way to specify this on a a per-worker basis, but for now it affects all workers. --- lib/MogileFS/Config.pm | 3 +++ lib/MogileFS/Host.pm | 5 ++++- t/http.t | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 3e3b4d19..4e3e6691 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -68,6 +68,7 @@ our ( $max_disk_age, $node_timeout, # time in seconds to wait for storage node responses $conn_timeout, # time in seconds to wait for connection to storage node + $conn_pool_size, # size of the HTTP connection pool $pidfile, $repl_use_get_port, $local_network, @@ -99,6 +100,7 @@ sub load_config { 'default_mindevcount=i' => \$cmdline{default_mindevcount}, 'node_timeout=i' => \$cmdline{node_timeout}, 'conn_timeout=i' => \$cmdline{conn_timeout}, + 'conn_pool_size=i' => \$cmdline{conn_pool_size}, 'max_handles=i' => \$cmdline{max_handles}, 'pidfile=s' => \$cmdline{pidfile}, 'no_schema_check' => \$cmdline{no_schema_check}, @@ -168,6 +170,7 @@ sub load_config { choose_value( 'default_mindevcount', 2 ); $node_timeout = choose_value( 'node_timeout', 2 ); $conn_timeout = choose_value( 'conn_timeout', 2 ); + $conn_pool_size = choose_value( 'conn_pool_size', 20 ); choose_value( 'rebalance_ignore_missing', 0 ); $repl_use_get_port = choose_value( 'repl_use_get_port', 0 ); diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index 62f7cc3a..76c1c5a0 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -162,8 +162,11 @@ sub http { # FIXME - make these customizable sub _init_pools { return if $http_pool; + my $opts = { + total_capacity => MogileFS->config("conn_pool_size"), + }; - $http_pool = MogileFS::ConnectionPool->new("MogileFS::Connection::HTTP"); + $http_pool = MogileFS::ConnectionPool->new("MogileFS::Connection::HTTP", $opts); } 1; diff --git a/t/http.t b/t/http.t index 22c40443..92a30183 100644 --- a/t/http.t +++ b/t/http.t @@ -32,11 +32,18 @@ my $host_args = { http_get_port => $http_get->sockport, }; my $host = MogileFS::Host->new_from_args($host_args); + +# required, defaults to 20 in normal server +MogileFS::Config->set_config("conn_pool_size", 13); + MogileFS::Host->_init_pools; + my $idle_pool = $MogileFS::Host::http_pool->{idle}; is("MogileFS::Host", ref($host), "host created"); MogileFS::Config->set_config("node_timeout", 1); +is(13, $MogileFS::Host::http_pool->{total_capacity}, "conn_pool_size took effect"); + # hit the http_get_port { my $resp; From fcd13ab5e6484e8a871ca4dbb859135102e773b8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 18 Nov 2013 19:55:22 +0000 Subject: [PATCH 386/405] connection/poolable: do not write before event_write Blindly attempting to write to a socket before a TCP connection can be established returns EAGAIN on Linux, but not on FreeBSD 8/9. This causes Danga::Socket to error out, as it won't attempt to buffer on anything but EAGAIN on write() attempts. Now, we buffer writes explicitly after the initial socket creation and connect(), and only call Danga::Socket::write when we've established writability. This works on Linux, too, and avoids an unnecessary syscall in most cases. Reported-by: Alex Yakovenko --- lib/MogileFS/Connection/Poolable.pm | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm index 16704ff5..e59d1443 100644 --- a/lib/MogileFS/Connection/Poolable.pm +++ b/lib/MogileFS/Connection/Poolable.pm @@ -13,6 +13,7 @@ use fields ( 'mfs_expire_cb', # Danga::Socket::Timer callback 'mfs_requests', # number of requests made on this object 'mfs_err', # used to propagate an error to start() + 'mfs_writeq', # arrayref if connecting, undef otherwise ); use Socket qw(SO_KEEPALIVE); use Time::HiRes; @@ -27,6 +28,9 @@ sub new { $self->{mfs_hostport} = [ $ip, $port ]; $self->{mfs_requests} = 0; + # newly-created socket, we buffer writes until event_write is triggered + $self->{mfs_writeq} = []; + return $self; } @@ -53,6 +57,37 @@ sub mark_idle { $self->{mfs_requests}++; } +sub write { + my ($self, $arg) = @_; + my $writeq = $self->{mfs_writeq}; + + if (ref($writeq) eq "ARRAY") { + # if we're still connecting, we must buffer explicitly for *BSD + # and not attempt a real write() until event_write is triggered + push @$writeq, $arg; + $self->watch_write(1); # enable event_write triggering + 0; # match Danga::Socket::write return value + } else { + $self->SUPER::write($arg); + } +} + +# Danga::Socket will trigger this when a socket is writable +sub event_write { + my ($self) = @_; + + # we may have buffered writes in mfs_writeq during non-blocking connect(), + # this is needed on *BSD but unnecessary (but harmless) on Linux. + my $writeq = delete $self->{mfs_writeq}; + if ($writeq) { + foreach my $queued (@$writeq) { + $self->write($queued); + } + } else { + $self->SUPER::event_write(); + } +} + # the request running on this connection is retryable if this socket # has ever been marked idle. The connection pool can never be 100% # reliable for detecting dead sockets, and all HTTP requests made by From 828ed8e00cf242f0ac1e42449d011563be29e0b9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 18 Nov 2013 22:16:48 +0000 Subject: [PATCH 387/405] connection/poolable: disable watch_write before retrying write Otherwise we'll end up constantly waking up when there's nothing to write. --- lib/MogileFS/Connection/Poolable.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm index e59d1443..c9ee84a2 100644 --- a/lib/MogileFS/Connection/Poolable.pm +++ b/lib/MogileFS/Connection/Poolable.pm @@ -80,6 +80,7 @@ sub event_write { # this is needed on *BSD but unnecessary (but harmless) on Linux. my $writeq = delete $self->{mfs_writeq}; if ($writeq) { + $self->watch_write(0); # ->write will re-enable if needed foreach my $queued (@$writeq) { $self->write($queued); } From fef1fe7dcd44d17e5c8e3c0fe3c2698a8aac038e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 1 Feb 2014 02:07:50 +0000 Subject: [PATCH 388/405] connection/poolable: defer expiry of timed out connections The timeout check may run on a socket before epoll_wait/kevent has a chance to run, giving the application no chance for any readiness callbacks to fire. This prevents timeouts in the monitor if the database is slow during synchronous UPDATE device calls (or there are just thousands of active connections). --- lib/MogileFS/Connection/Poolable.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm index c9ee84a2..86b99d48 100644 --- a/lib/MogileFS/Connection/Poolable.pm +++ b/lib/MogileFS/Connection/Poolable.pm @@ -105,6 +105,7 @@ sub set_timeout { my ($self, $timeout_key) = @_; my $mfs_pool = $self->{mfs_pool}; + $self->SetPostLoopCallback(undef); if ($timeout_key) { my $timeout; @@ -144,7 +145,7 @@ sub expired { if ($now >= $expire) { my $expire_cb = delete $self->{mfs_expire_cb}; if ($expire_cb && $self->sock) { - $expire_cb->($now); + $self->SetPostLoopCallback(sub { $expire_cb->($now); 1 }); } return 1; } From e654401531f82363091af536c3460d84739c61bd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 1 Feb 2014 08:33:23 +0000 Subject: [PATCH 389/405] monitor: defer DB updates until all HTTP requests are done HTTP requests time out because we had to wait synchronously for DBI, this is very noticeable on a high-latency connection. So avoid running synchronous code while asynchronous code (which is subject to timeouts) is running.. --- lib/MogileFS/Store.pm | 9 ++++++--- lib/MogileFS/Worker/Monitor.pm | 28 +++++++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 62edfee3..a8179c40 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1219,10 +1219,13 @@ sub update_device { sub update_device_usage { my $self = shift; - my %arg = $self->_valid_params([qw(mb_total mb_used devid)], @_); + my %arg = $self->_valid_params([qw(mb_total mb_used devid mb_asof)], @_); eval { - $self->dbh->do("UPDATE device SET mb_total = ?, mb_used = ?, mb_asof = " . $self->unix_timestamp . - " WHERE devid = ?", undef, $arg{mb_total}, $arg{mb_used}, $arg{devid}); + $self->dbh->do("UPDATE device SET ". + "mb_total = ?, mb_used = ?, mb_asof = ?" . + " WHERE devid = ?", + undef, $arg{mb_total}, $arg{mb_used}, $arg{mb_asof}, + $arg{devid}); }; $self->condthrow; } diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 74f6d761..ecb8419a 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -20,6 +20,7 @@ use fields ( 'db_monitor_ran', # We announce "monitor_just_ran" every time the # device checks are run, but only if the DB has # been checked inbetween. + 'devs_to_update' # device table update queue ); use Danga::Socket 1.56; @@ -105,6 +106,7 @@ sub usage_refresh { # Fetch the freshlist list of entries, to avoid excessive writes. $self->{updateable_devices} = { map { $_->{devid} => $_ } Mgd::get_store()->get_all_devices }; + $self->{devs_to_update} = []; } else { $self->{updateable_devices} = undef; } @@ -133,11 +135,6 @@ sub usage_refresh { sub usage_refresh_done { my ($self) = @_; - if ($self->{updateable_devices}) { - Mgd::get_store()->release_lock('mgfs:device_update'); - $self->{updateable_devices} = undef; - } - $self->{devutil}->{prev} = $self->{devutil}->{tmp}; # Set the IOWatcher hosts (once old monitor code has been disabled) @@ -179,6 +176,16 @@ sub usage_refresh_done { $self->send_to_parent(":monitor_just_ran"); } } + + if ($self->{updateable_devices}) { + my $sto = Mgd::get_store(); + my $updates = delete $self->{devs_to_update}; + foreach my $upd (@$updates) { + $sto->update_device_usage(%$upd); + } + $sto->release_lock('mgfs:device_update'); + $self->{updateable_devices} = undef; + } } sub work { @@ -410,10 +417,13 @@ sub check_usage_response { if ($self->{updateable_devices}) { my $devrow = $self->{updateable_devices}->{$devid}; my $last = ($devrow && $devrow->{mb_asof}) ? $devrow->{mb_asof} : 0; - if ($last + UPDATE_DB_EVERY < time()) { - Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), - mb_used => int($used / 1024), - devid => $devid); + my $now = time(); + if ($last + UPDATE_DB_EVERY < $now) { + my %upd = (mb_total => int($total / 1024), + mb_used => int($used / 1024), + mb_asof => $now, + devid => $devid); + push @{$self->{devs_to_update}}, \%upd; } } return 1; From b0f05b75eabbfdb8114aa6369520defdaf614141 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 1 Feb 2014 08:49:10 +0000 Subject: [PATCH 390/405] monitor: ping parent during deferred DB updates With enough devices and high enough network latency to the DB, we bump into the watchdog timeout of 30s easily. --- lib/MogileFS/Worker/Monitor.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index ecb8419a..819ca933 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -182,6 +182,7 @@ sub usage_refresh_done { my $updates = delete $self->{devs_to_update}; foreach my $upd (@$updates) { $sto->update_device_usage(%$upd); + $self->still_alive; } $sto->release_lock('mgfs:device_update'); $self->{updateable_devices} = undef; From 7b6fe2ebc692e6936b11612c756297f2c35a1616 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 6 Feb 2014 02:47:03 +0000 Subject: [PATCH 391/405] monitor: batch MySQL device table updates Issuing many UPDATE statements slow down monitoring on high latency connections between the monitor and DB. Under MySQL, it is possible to do multiple UPDATEs in a single statement using CASE/WHEN syntax. We limit ourselves to 10000 devices per update for now, this should keep us comfortably under most the max_allowed_packet size of most MySQL deployments (where the default is 1M). A compatibility function is provided for SQLite and Postgres users. SQLite users are not expected to run this over high-latency NFS, and interested Postgres users should submit their own implementation. --- lib/MogileFS/Store.pm | 9 +++++++++ lib/MogileFS/Store/MySQL.pm | 25 +++++++++++++++++++++++++ lib/MogileFS/Worker/Monitor.pm | 5 +---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index a8179c40..868e7d06 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1230,6 +1230,15 @@ sub update_device_usage { $self->condthrow; } +# MySQL has an optimized version +sub update_device_usages { + my ($self, $updates, $cb) = @_; + foreach my $upd (@$updates) { + $self->update_device_usage(%$upd); + $cb->(); + } +} + # This is unimplemented at the moment as we must verify: # - no file_on rows exist # - nothing in file_to_queue is going to attempt to use it diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index d985f0e1..c0f5a3dc 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -444,6 +444,31 @@ sub get_keys_like_operator { return $bool ? "LIKE /*! BINARY */" : "LIKE"; } +sub update_device_usages { + my ($self, $updates, $cb) = @_; + $cb->(); + my $chunk = 10000; # in case we hit max_allowed_packet size(!) + while (scalar @$updates) { + my @cur = splice(@$updates, 0, $chunk); + my @set; + foreach my $fld (qw(mb_total mb_used mb_asof)) { + my $s = "$fld = CASE devid\n"; + foreach my $upd (@cur) { + my $devid = $upd->{devid}; + defined($devid) or croak("devid not set\n"); + my $val = $upd->{$fld}; + defined($val) or croak("$fld not defined for $devid\n"); + $s .= "WHEN $devid THEN $val\n"; + } + $s .= "ELSE $fld END"; + push @set, $s; + } + my $sql = "UPDATE device SET ". join(",\n", @set); + $self->dowell($sql); + $cb->(); + } +} + 1; __END__ diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 819ca933..912822d4 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -180,10 +180,7 @@ sub usage_refresh_done { if ($self->{updateable_devices}) { my $sto = Mgd::get_store(); my $updates = delete $self->{devs_to_update}; - foreach my $upd (@$updates) { - $sto->update_device_usage(%$upd); - $self->still_alive; - } + $sto->update_device_usages($updates, sub { $self->still_alive }); $sto->release_lock('mgfs:device_update'); $self->{updateable_devices} = undef; } From 196e92889f6c52278907eeafd95bdea4ab49f298 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 12 Nov 2014 01:31:28 +0000 Subject: [PATCH 392/405] remove users of unreachable_fids table mark_fidid_unreachable has not been used since MogileFS 2.35 commit 53528c71ca95a04cf031c39848c38b47389d039f ("Wipe out old replication code.", r1432) --- lib/MogileFS/FID.pm | 6 ------ lib/MogileFS/Store.pm | 7 ------- lib/MogileFS/Store/Postgres.pm | 14 -------------- 3 files changed, 27 deletions(-) diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index 0e5337b7..627c45e4 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -151,12 +151,6 @@ sub enqueue_for_replication { Mgd::get_store()->enqueue_for_replication($self->id, $from_devid, $in); } -sub mark_unreachable { - my $self = shift; - # update database table - Mgd::get_store()->mark_fidid_unreachable($self->id); -} - sub delete { my $fid = shift; my $sto = Mgd::get_store(); diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index 868e7d06..d266a0c7 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1252,13 +1252,6 @@ sub delete_device { die "Unimplemented; needs further testing"; } -sub mark_fidid_unreachable { - my ($self, $fidid) = @_; - die "Your database does not support REPLACE! Reimplement mark_fidid_unreachable!" unless $self->can_replace; - $self->dbh->do("REPLACE INTO unreachable_fids VALUES (?, " . $self->unix_timestamp . ")", - undef, $fidid); -} - sub set_device_weight { my ($self, $devid, $weight) = @_; eval { diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 54a4a8f7..24197163 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -701,20 +701,6 @@ sub create_device { return 1; } -sub mark_fidid_unreachable { - my ($self, $fidid) = @_; - my $dbh = $self->dbh; - - eval { - $self->insert_or_update( - insert => "INSERT INTO unreachable_fids (fid, lastupdate) VALUES (?, ".$self->unix_timestamp.")", - insert_vals => [ $fidid ], - update => "UPDATE unreachable_fids SET lastupdate = ".$self->unix_timestamp." WHERE field = ?", - update_vals => [ $fidid ], - ); - }; -} - sub replace_into_file { my $self = shift; my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); From a8dbbea8d7c5929df8785b40b5034cbd7e5bc311 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 12 Nov 2014 07:51:59 +0000 Subject: [PATCH 393/405] remove update_host_property No longer used since commit ebf8a5a8dc9b4452671f7816b99583bf7934e9b1 ("Mass nuke unused code and fix most tests") in MogileFS 2.50 --- lib/MogileFS/Store.pm | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index d266a0c7..e45eccc6 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1671,14 +1671,6 @@ sub update_host { return 1; } -sub update_host_property { - my ($self, $hostid, $col, $val) = @_; - $self->conddup(sub { - $self->dbh->do("UPDATE host SET $col=? WHERE hostid=?", undef, $val, $hostid); - }); - return 1; -} - # return ne hostid, or throw 'dup' on error. # NOTE: you need to put them into the initial 'down' state. sub create_host { From d0ee2a27253ade4b50ef5b91471f4f520a67bf8c Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 15 Dec 2014 22:57:49 -0800 Subject: [PATCH 394/405] Work with DBD::SQLite's latest lock errors "is not unique" => "UNIQUE constraint failed". String matching is lovely. --- lib/MogileFS/Store/SQLite.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index ff6281cf..70f1425c 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -174,6 +174,7 @@ sub was_duplicate_error { my $errstr = $dbh->errstr; return 1 if $errstr =~ /(?:is|are) not unique/i; return 1 if $errstr =~ /must be unique/i; + return 1 if $errstr =~ /UNIQUE constraint failed/i; return 0; } From 6832b1b8514e2967c778bdfecec9f5f4b65567b8 Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 15 Dec 2014 23:02:34 -0800 Subject: [PATCH 395/405] Checking in changes prior to tagging of version 2.72. Changelog diff is: diff --git a/CHANGES b/CHANGES index a6b2872..441b328 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2014-12-15: Release version 2.72 + + * Work with DBD::SQLite's latest lock errors (dormando ) + + * remove update_host_property (Eric Wong ) + + * remove users of unreachable_fids table (Eric Wong ) + + * monitor: batch MySQL device table updates (Eric Wong ) + + * monitor: defer DB updates until all HTTP requests are done (Eric Wong ) + + * connection/poolable: defer expiry of timed out connections (Eric Wong ) + + * connection/poolable: disable watch_write before retrying write (Eric Wong ) + + * connection/poolable: do not write before event_write (Eric Wong ) + + * add conn_pool_size configuration option (Eric Wong ) + + * enable TCP keepalives for iostat watcher sockets (Eric Wong ) + + * host: add "readonly" state to override device "alive" state (Eric Wong ) + + * add LICENSE file to distro (dormando ) + 2013-08-18: Release version 2.70 * This release features a very large rewrite to the Monitor worker to run --- CHANGES | 26 ++++++++++++++++++++++++++ LICENSE | 1 + lib/MogileFS/Server.pm | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/CHANGES b/CHANGES index a6b28726..441b328a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2014-12-15: Release version 2.72 + + * Work with DBD::SQLite's latest lock errors (dormando ) + + * remove update_host_property (Eric Wong ) + + * remove users of unreachable_fids table (Eric Wong ) + + * monitor: batch MySQL device table updates (Eric Wong ) + + * monitor: defer DB updates until all HTTP requests are done (Eric Wong ) + + * connection/poolable: defer expiry of timed out connections (Eric Wong ) + + * connection/poolable: disable watch_write before retrying write (Eric Wong ) + + * connection/poolable: do not write before event_write (Eric Wong ) + + * add conn_pool_size configuration option (Eric Wong ) + + * enable TCP keepalives for iostat watcher sockets (Eric Wong ) + + * host: add "readonly" state to override device "alive" state (Eric Wong ) + + * add LICENSE file to distro (dormando ) + 2013-08-18: Release version 2.70 * This release features a very large rewrite to the Monitor worker to run diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..86ae4f48 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +License granted to use/distribute under the same terms as Perl itself. diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 75bb89ab..e1afa047 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.70"; +$VERSION = "2.72"; =head1 NAME From f9c9d68d6f6c249b9bdc2a4d3ec156515e6ae8b4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 Apr 2015 01:44:00 +0000 Subject: [PATCH 396/405] replicate: reduce backoff for too_happy FIDs Due to a bug the MultipleNetworks replication policy <20150415233844.GA3878@dcvr.yhbt.net>, a network split caused an instance to explode with overreplicated files. Since every too_happy pruning increases failcount, it could end up taking days due to clean up a file with far too many replicas. --- lib/MogileFS/Worker/Replicate.pm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index f539710e..6687fb29 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -172,7 +172,13 @@ sub replicate_using_torepl_table { last; } } - $self->rebalance_devfid($devfid) if $devfid; + if ($devfid) { + if ($self->rebalance_devfid($devfid)) { + # disable exponential backoff below if we rebalanced due to + # excessive replication: + $todo->{failcount} = 0; + } + } } # at this point, the rest of the errors require exponential backoff. define what this means @@ -184,7 +190,7 @@ sub replicate_using_torepl_table { return 1; } -# Return 1 on success, 0 on failure. +# Return 1 on success, 0 on failure or no-op. sub rebalance_devfid { my ($self, $devfid, $opts) = @_; $opts ||= {}; @@ -273,7 +279,7 @@ sub rebalance_devfid { } $unlock->(); - return 1; + return $should_delete; } # replicates $fid to make sure it meets its class' replicate policy. From e2bd3ffc978442a52eb06f185d2ed31d5b439e66 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 12 Jun 2015 00:41:39 +0000 Subject: [PATCH 397/405] enable DB upgrade for host readonly state The readonly host state was not enabled via mogdbsetup and could not be used although the code supports it, making the schema version bump to 16 a no-op. This bumps the schema version to 17. Add a test using mogadm to ensure the setting is changeable, as the existing test for this state did not rely on the database. This was also completely broken with Postgres before, as Postgres currently offers no way to modify constraints in-place. Constraints must be dropped and re-added instead. Note: it seems the upgrade_add_device_* functions in Postgres.pm are untested as well and never got used. Perhaps they ought to be removed entirely since those device columns predate Postgres support. --- lib/MogileFS/Store.pm | 6 ++++-- lib/MogileFS/Store/Postgres.pm | 14 ++++++++++---- t/00-startup.t | 2 ++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index e45eccc6..c16aec14 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -20,8 +20,9 @@ use List::Util qw(shuffle); # also adds a TEXT 'arg' column to file_to_queue for passing arguments # 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB # 15: adds checksum table, adds 'hashtype' column to 'class' table -# 16: adds 'readonly' state to enum in host table -use constant SCHEMA_VERSION => 16; +# 16: no-op, see 17 +# 17: adds 'readonly' state to enum in host table +use constant SCHEMA_VERSION => 17; sub new { my ($class) = @_; @@ -543,6 +544,7 @@ sub setup_database { $sto->upgrade_add_file_to_queue_arg; $sto->upgrade_modify_device_size; $sto->upgrade_add_class_hashtype; + $sto->upgrade_add_host_readonly; return 1; } diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 24197163..91e39c4e 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -294,8 +294,13 @@ sub upgrade_add_device_drain { sub upgrade_add_host_readonly { my $self = shift; - unless ($self->column_constraint("host", "status") =~ /\breadonly\b/) { - $self->dowell("ALTER TABLE host MODIFY COLUMN status VARCHAR(8) CHECK(status IN ('alive', 'dead', 'down', 'readonly'))"); + my $cn; + unless ($self->column_constraint("host", "status", \$cn) =~ /\breadonly\b/) { + $self->dbh->begin_work; + $self->dowell("ALTER TABLE host DROP CONSTRAINT $cn"); + $self->dowell("ALTER TABLE host ADD CONSTRAINT status CHECK(". + "status IN ('alive', 'dead', 'down', 'readonly'))"); + $self->dbh->commit; } } @@ -447,12 +452,13 @@ sub column_type { } sub column_constraint { - my ($self, $table, $col) = @_; - my $sth = $self->dbh->prepare("SELECT column_name,information_schema.check_constraints.check_clause FROM information_schema.constraint_column_usage JOIN information_schema.check_constraints USING(constraint_catalog,constraint_schema,constraint_name) WHERE table_name=? AND column_name=?"); + my ($self, $table, $col, $cn) = @_; + my $sth = $self->dbh->prepare("SELECT column_name,information_schema.check_constraints.check_clause,constraint_name FROM information_schema.constraint_column_usage JOIN information_schema.check_constraints USING(constraint_catalog,constraint_schema,constraint_name) WHERE table_name=? AND column_name=?"); $sth->execute($table,$col); while (my $rec = $sth->fetchrow_hashref) { if ($rec->{column_name} eq $col) { $sth->finish; + $$cn = $rec->{constraint_name} if $cn; return $rec->{check_clause}; } } diff --git a/t/00-startup.t b/t/00-startup.t index 62be86f4..666cdad6 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -484,4 +484,6 @@ foreach my $t (qw(file file_on file_to_delete)) { $sto->set_server_setting('case_sensitive_list_keys', undef); } +ok($tmptrack->mogadm(qw(host mark hostA readonly)), "host state=readonly"); + done_testing(); From 8b76d98f80b5ab231634f6e0253a2d71c41e02f8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2015 03:57:38 +0000 Subject: [PATCH 398/405] replicate: avoid buffered IO on reads Perl buffered IO is only reading 8K at a time (or only 4K on older versions!) despite us requesting to read in 1MB chunks. This wastes syscalls and can affect TCP window scaling when MogileFS is replicating across long fat networks (LFN). While we're at it, this fixes a long-standing FIXME item to perform proper timeouts when reading headers as we're forced to do sysread instead of line-buffered I/O. ref: https://rt.perl.org/Public/Bug/Display.html?id=126403 (and confirmed by strace-ing replication workers) --- lib/MogileFS/Worker/Replicate.pm | 78 ++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index f539710e..363f20f9 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -9,7 +9,7 @@ use fields ( use List::Util (); use MogileFS::Server; -use MogileFS::Util qw(error every debug); +use MogileFS::Util qw(error every debug wait_for_readability); use MogileFS::Config; use MogileFS::ReplicationRequest qw(rr_upgrade); use Digest; @@ -25,6 +25,7 @@ sub new { # replicator wants sub watchdog_timeout { 90; } +use constant SOCK_TIMEOUT => 45; sub work { my $self = shift; @@ -530,20 +531,32 @@ sub replicate { # keep => boolean, whether to keep the connection after reading # len => value of the Content-Length header (integer) # } +# Returns undef on timeout sub read_headers { - my ($sock) = @_; - my %rv = (); - # FIXME: this can block. needs to timeout. - my $line = <$sock>; - return unless defined $line; - $line =~ m!\AHTTP/(\d+\.\d+)\s+(\d+)! or return; - $rv{keep} = $1 >= 1.1; - $rv{code} = $2; + my ($sock, $intercopy_cb) = @_; + my $head = ''; - while (1) { - $line = <$sock>; - return unless defined $line; - last if $line =~ /\A\r?\n\z/; + do { + wait_for_readability(fileno($sock), SOCK_TIMEOUT) or return; + $intercopy_cb->(); + my $r = sysread($sock, $head, 1024, length($head)); + if (defined $r) { + return if $r == 0; # EOF + } elsif ($!{EAGAIN} || $!{EINTR}) { + # loop again + } else { + return; + } + } until ($head =~ /\r?\n\r?\n/); + + my $data; + ($head, $data) = split(/\r?\n\r?\n/, $head, 2); + my @head = split(/\r?\n/, $head); + $head = shift(@head); + $head =~ m!\AHTTP/(\d+\.\d+)\s+(\d+)! or return; + my %rv = ( keep => $1 >= 1.1, code => $2 ); + + foreach my $line (@head) { if ($line =~ /\AConnection:\s*keep-alive\s*\z/is) { $rv{keep} = 1; } elsif ($line =~ /\AConnection:\s*close\s*\z/is) { @@ -552,7 +565,7 @@ sub read_headers { $rv{len} = $1; } } - return \%rv; + return (\%rv, $data); } # copies a file from one Perlbal to another utilizing HTTP @@ -652,10 +665,10 @@ sub http_copy { # plugin set a custom host. $get .= "Host: $shttphost\r\n" if $shttphost; - my $data = ''; my ($sock, $dsock); my ($wcount, $bytes_to_read, $written, $remain); my ($stries, $dtries) = (0, 0); + my ($sres, $data, $bytes); retry: $sconn->close("retrying") if $sconn; @@ -671,7 +684,7 @@ retry: } # we just want a content length - my $sres = read_headers($sock); + ($sres, $data) = read_headers($sock, $intercopy_cb); unless ($sres) { goto retry if $sconn->retryable && $stries == 1; return $error_unreachable->("Error: Resource $surl failed to return an HTTP response"); @@ -696,18 +709,26 @@ retry: } # now read data and print while we're reading. + $bytes = length($data); ($written, $remain) = (0, $clen); $bytes_to_read = 1024*1024; # read 1MB at a time until there's less than that remaining $bytes_to_read = $remain if $remain < $bytes_to_read; $wcount = 0; while ($bytes_to_read) { - my $bytes = $sock->read($data, $bytes_to_read); unless (defined $bytes) { - return $src_error->("error reading midway through source: $!"); - } - if ($bytes == 0) { - return $src_error->("EOF reading midway through source: $!"); +read_again: + $bytes = sysread($sock, $data, $bytes_to_read); + unless (defined $bytes) { + if ($!{EAGAIN} || $!{EINTR}) { + wait_for_readability(fileno($sock), SOCK_TIMEOUT) and + goto read_again; + } + return $src_error->("error reading midway through source: $!"); + } + if ($bytes == 0) { + return $src_error->("EOF reading midway through source: $!"); + } } # now we've read in $bytes bytes @@ -716,6 +737,7 @@ retry: $digest->add($data) if $digest; my $data_len = $bytes; + $bytes = undef; my $data_off = 0; while (1) { my $wbytes = syswrite($dsock, $data, $data_len, $data_off); @@ -757,7 +779,7 @@ retry: } # now read in the response line (should be first line) - my $dres = read_headers($dsock); + my ($dres, $ddata) = read_headers($dsock, $intercopy_cb); unless ($dres) { goto retry if (!$wcount && $dconn->retryable && $dtries == 1); return $dest_error->("Error: HTTP response line not recognized writing to $durl"); @@ -765,10 +787,11 @@ retry: # drain the response body if there is one # there may be no dres->{len}/Content-Length if there is no body - if ($dres->{len}) { - my $r = $dsock->read($data, $dres->{len}); # dres->{len} should be tiny + my $dlen = ($dres->{len} || 0) - length($ddata); + if ($dlen > 0) { + my $r = $dsock->read($data, $dlen); # dres->{len} should be tiny if (defined $r) { - if ($r != $dres->{len}) { + if ($r != $dlen) { Mgd::error("Failed to read $r of Content-Length:$dres->{len} bytes for PUT response on $durl"); $dres->{keep} = 0; } @@ -776,6 +799,11 @@ retry: Mgd::error("Failed to read Content-Length:$dres->{len} bytes for PUT response on $durl ($!)"); $dres->{keep} = 0; } + } elsif ($dlen < 0) { + Mgd::error("strange response Content-Length:$dres->{len} with ". + length($ddata) . + " extra bytes for PUT response on $durl ($!)"); + $dres->{keep} = 0; } # return the connection back to the connection pool From 2acba9e5af7d898cfb5ab89c5c8c1f79050d6632 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 13 Feb 2017 00:19:42 +0000 Subject: [PATCH 399/405] ConnectionPool: avoid undefined behavior for hash iteration Perl 5.18 stable and later (commit a7b39f85d7caac) introduced a warning for restarting `each` after hash modification. While we accounted for this undefined behavior and documented it in the past, this may still cause maintenance problems in the future despite our current workarounds being sufficient. In any case, keeping idle sockets around is cheap with modern APIs, and conn_pool_size was introduced in 2.72 to avoid dropping idle connections at all; so _conn_drop_idle may never be called on a properly configured tracker. Mailing list references: <20160114024652.GA4403@dcvr.yhbt.net> --- lib/MogileFS/ConnectionPool.pm | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/lib/MogileFS/ConnectionPool.pm b/lib/MogileFS/ConnectionPool.pm index 88d0eca1..7d4b00f3 100644 --- a/lib/MogileFS/ConnectionPool.pm +++ b/lib/MogileFS/ConnectionPool.pm @@ -397,18 +397,11 @@ sub _conn_drop_idle { my ($self) = @_; my $idle = $self->{idle}; - # using "each" on the hash since it preserves the internal iterator - # of the hash across invocations of this sub. This should preserve - # the balance of idle connections in a big pool with many hosts. - # Thus we loop twice to ensure we scan the entire idle connection - # pool if needed - foreach (1..2) { - while (my (undef, $val) = each %$idle) { - my $conn = shift @$val or next; - - $conn->close("idle_expire") if $conn->sock; - return; - } + foreach my $val (values %$idle) { + my $conn = shift @$val or next; + + $conn->close("idle_expire") if $conn->sock; + return; } confess("BUG: unable to drop an idle connection"); From 56b39b80740be3ce3a420aaf3b9ed67b6df4572f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 6 Apr 2017 22:18:07 +0000 Subject: [PATCH 400/405] client connection should always be nonblocking On *BSD platforms, the accept()-ed clients inherit the O_NONBLOCK file flag from the listen socket. This is not true on Linux, and I noticed sockets blocking on write() syscalls via strace. Checking the octal 04000 (O_NONBLOCK) flag in /proc/$PID/fdinfo/$FD for client TCP sockets confirms O_NONBLOCK was not set. This also makes us resilient to spurious wakeups causing event_read to get stuck, as documented in the Linux select(2) manpage. --- lib/MogileFS/Connection/Client.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index bf58222c..528998fd 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -7,6 +7,7 @@ package MogileFS::Connection::Client; use strict; use Danga::Socket (); use base qw{Danga::Socket}; +use IO::Handle; use fields qw{read_buf}; @@ -14,6 +15,7 @@ sub new { my $self = shift; $self = fields::new($self) unless ref $self; $self->SUPER::new( @_ ); + IO::Handle::blocking($self->{sock}, 0); $self->watch_read(1); return $self; } From 05cdf17041374690f588a3efe381a3463a44d013 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 7 Apr 2017 00:02:18 +0000 Subject: [PATCH 401/405] tracker: client fairness, backpressure, and expiry Make client query processing less aggressive and more fair by only enqueueing a single worker request at a time. Pipelined requests in the read buffer will only be handled after successful writes, and any incomplete writes will block further request processing. Furthermore, add a watchdog for clients we're writing to expire clients which are not reading our responses. Danga::Socket allows clients to use an infinite amount of space for buffering, and it's possible for dead sockets to go undetected for hours by the OS. Use a watchdog to kick out any sockets which have made no forward progress after two minutes. --- lib/MogileFS/Connection/Client.pm | 77 +++++++++++++++++++++++++++++-- lib/MogileFS/ProcManager.pm | 3 ++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index 528998fd..121da5b9 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -8,30 +8,98 @@ use strict; use Danga::Socket (); use base qw{Danga::Socket}; use IO::Handle; +use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); +use MogileFS::Util qw(error); -use fields qw{read_buf}; +use fields qw{read_buf pipelined}; + +my %SLOW_WRITERS = (); +my $EXPTIME = 120; +my $PIPELINE = []; + +sub Reset { + %SLOW_WRITERS = (); + $PIPELINE = []; +} + +sub WriterWatchDog { + my $dmap = Danga::Socket->DescriptorMap; + my $old = clock_gettime(CLOCK_MONOTONIC) - $EXPTIME; + foreach my $fd (keys %SLOW_WRITERS) { + my $last_write = $SLOW_WRITERS{$fd}; + next if $last_write > $old; + + if (my $ds = $dmap->{$fd}) { + error('write timeout expired: '.$ds->as_string); + $ds->close; + } else { + error("fd=$fd not known to Danga::Socket(!?), ignoring"); + } + delete $SLOW_WRITERS{$fd}; + } +} + +sub ProcessPipelined { + my $run = $PIPELINE; + $PIPELINE = []; + foreach my MogileFS::Connection::Client $clref (@$run) { + $clref->{pipelined} = undef; + $clref->process_request or $clref->watch_read(1); + } +} sub new { my $self = shift; $self = fields::new($self) unless ref $self; $self->SUPER::new( @_ ); IO::Handle::blocking($self->{sock}, 0); + delete $SLOW_WRITERS{$self->{fd}}; $self->watch_read(1); return $self; } # Client + +sub process_request { + my MogileFS::Connection::Client $self = shift; + + while ($self->{read_buf} =~ s/^(.*?)\r?\n//) { + next unless length $1; + $self->handle_request($1); + return 1; + } + 0; +} + sub event_read { my MogileFS::Connection::Client $self = shift; my $bref = $self->read(1024); return $self->close unless defined $bref; $self->{read_buf} .= $$bref; + $self->process_request; +} - while ($self->{read_buf} =~ s/^(.*?)\r?\n//) { - next unless length $1; - $self->handle_request($1); +sub write { + my MogileFS::Connection::Client $self = shift; + my $done = $self->SUPER::write(@_); + my $fd = $self->{fd}; + if ($done) { + if (defined $fd) { + delete $SLOW_WRITERS{$fd}; + unless ($self->{pipelined}) { + $self->{pipelined} = 1; + push @$PIPELINE, $self; + } + } + } else { + # stop reading if we can't write, otherwise we'll OOM + if (defined $fd) { + $SLOW_WRITERS{$fd} = clock_gettime(CLOCK_MONOTONIC); + $self->watch_read(0); + } } + $done; } sub handle_request { @@ -48,6 +116,7 @@ sub handle_request { return $self->handle_admin_command($cmd, $args); } + $self->watch_read(0); MogileFS::ProcManager->EnqueueCommandRequest($line, $self); } diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 44f411ce..2c580f90 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -147,12 +147,14 @@ sub PostEventLoopChecker { my $lastspawntime = 0; # time we last ran spawn_children sub return sub { + MogileFS::Connection::Client->ProcessPipelined; # run only once per second $nowish = time(); return 1 unless $nowish > $lastspawntime; $lastspawntime = $nowish; MogileFS::ProcManager->WatchDog; + MogileFS::Connection::Client->WriterWatchDog; # see if anybody has died, but don't hang up on doing so while(my $pid = waitpid -1, WNOHANG) { @@ -370,6 +372,7 @@ sub SetAsChild { # we just forked from our parent process, also using Danga::Socket, # so we need to lose all that state and start afresh. Danga::Socket->Reset; + MogileFS::Connection::Client->Reset; } # called when a child has died. a child is someone doing a job for us, From 5d18499348d31dfea1c34df759d830631518e48f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 7 Apr 2017 00:14:23 +0000 Subject: [PATCH 402/405] client: use single write for admin commands This avoids the odd case where the first write completes, but the second one (for 3 bytes: ".\r\n") does not complete, causing a client to having both read and write watchability enabled after the previous commit to stop reads when writes do not complete. This would not be fatal, but breaks the rule where clients should only be reading or writing exclusively, never doing both; as that could lead to pathological memory usage. This also reduces client wakeups and TCP overhead with TCP_NODELAY sockets by avoiding a small packet (".\r\n") after the main response. --- lib/MogileFS/Connection/Client.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index 121da5b9..4e825fc5 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -211,8 +211,8 @@ sub handle_admin_command { MogileFS::ProcManager->SendHelp($self, $args); } - $self->write(join("\r\n", @out) . "\r\n") if @out; - $self->write(".\r\n"); + push @out, '.', ''; + $self->write(join("\r\n", @out)); return; } From 7748d36fe85375fa141fa215cb48405d045b11be Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 7 Apr 2017 05:11:52 +0000 Subject: [PATCH 403/405] client: always disable watch_read after a command Otherwise it'll be possible to pipeline admin (!) commands and event_read will trigger EOF before all the admin commands are processed in read_buf. --- lib/MogileFS/Connection/Client.pm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index 4e825fc5..a4981be9 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -77,7 +77,9 @@ sub event_read { my $bref = $self->read(1024); return $self->close unless defined $bref; $self->{read_buf} .= $$bref; - $self->process_request; + if ($self->process_request) { + $self->watch_read(0); + } } sub write { @@ -116,7 +118,6 @@ sub handle_request { return $self->handle_admin_command($cmd, $args); } - $self->watch_read(0); MogileFS::ProcManager->EnqueueCommandRequest($line, $self); } From 2c50b8b0cbe48fcba064ca2b687e01c11c7b2601 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Jun 2017 23:51:01 +0000 Subject: [PATCH 404/405] fsck: avoid infinite wait on dead devices If DevFID::size_on_disk encounters an unreadable (dead) device AND there are no HTTP requests pending; we must ensure Danga::Socket runs the PostLoopCallback to check if the event loop is complete. Do that by scheduling another timer to run immediately. --- lib/MogileFS/Worker/Fsck.pm | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 31436e25..fc79ab9d 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -221,7 +221,12 @@ sub parallel_check_sizes { $df->size_on_disk(sub { my ($size) = @_; $done++; - $good++ if $cb->($df, $size); + if ($cb->($df, $size)) { + $good++; + } else { + # use another timer to force PostLoopCallback to run + Danga::Socket->AddTimer(0, sub { $self->still_alive }); + } }); } From 92b69140a6e96703d156bbf3901da2fef2e5aa82 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 19 Jan 2018 13:42:46 -0800 Subject: [PATCH 405/405] Checking in changes prior to tagging of version 2.73. Changelog diff is: diff --git a/CHANGES b/CHANGES index 441b328..e053851 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2018-01-18: Release version 2.73 + + * fsck: avoid infinite wait on dead devices (Eric Wong ) + + * client: always disable watch_read after a command (Eric Wong ) + + * client: use single write for admin commands (Eric Wong ) + + * tracker: client fairness, backpressure, and expiry (Eric Wong ) + + * client connection should always be nonblocking (Eric Wong ) + + * ConnectionPool: avoid undefined behavior for hash iteration (Eric Wong ) + + * replicate: avoid buffered IO on reads (Eric Wong ) + + * enable DB upgrade for host readonly state (Eric Wong ) + + * replicate: reduce backoff for too_happy FIDs (Eric Wong ) + + * fsck: this avoid redundant fsck log entries (Eric Wong ) + + * fsck: do not log FOND if note_on_device croaks (Eric Wong ) + + * reaper: detect resurrection of "dead" devices (Eric Wong ) + 2014-12-15: Release version 2.72 * Work with DBD::SQLite's latest lock errors (dormando ) --- CHANGES | 26 ++++++++++++++++++++++++++ lib/MogileFS/Server.pm | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 441b328a..e053851a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +2018-01-18: Release version 2.73 + + * fsck: avoid infinite wait on dead devices (Eric Wong ) + + * client: always disable watch_read after a command (Eric Wong ) + + * client: use single write for admin commands (Eric Wong ) + + * tracker: client fairness, backpressure, and expiry (Eric Wong ) + + * client connection should always be nonblocking (Eric Wong ) + + * ConnectionPool: avoid undefined behavior for hash iteration (Eric Wong ) + + * replicate: avoid buffered IO on reads (Eric Wong ) + + * enable DB upgrade for host readonly state (Eric Wong ) + + * replicate: reduce backoff for too_happy FIDs (Eric Wong ) + + * fsck: this avoid redundant fsck log entries (Eric Wong ) + + * fsck: do not log FOND if note_on_device croaks (Eric Wong ) + + * reaper: detect resurrection of "dead" devices (Eric Wong ) + 2014-12-15: Release version 2.72 * Work with DBD::SQLite's latest lock errors (dormando ) diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index e1afa047..7520e063 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.72"; +$VERSION = "2.73"; =head1 NAME