diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0b5bd399 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +Makefile +blib +pm_to_blib diff --git a/.shipit b/.shipit index ca301200..a797898d 100644 --- a/.shipit +++ b/.shipit @@ -1,4 +1,4 @@ steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN -svn.tagpattern = mogilefs-server-%v +git.tagpattern = %v diff --git a/CHANGES b/CHANGES index 6d5fea9b..e053851a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,504 @@ +2018-01-18: Release version 2.73 + + * fsck: avoid infinite wait on dead devices (Eric Wong ) + + * client: always disable watch_read after a command (Eric Wong ) + + * client: use single write for admin commands (Eric Wong ) + + * tracker: client fairness, backpressure, and expiry (Eric Wong ) + + * client connection should always be nonblocking (Eric Wong ) + + * ConnectionPool: avoid undefined behavior for hash iteration (Eric Wong ) + + * replicate: avoid buffered IO on reads (Eric Wong ) + + * enable DB upgrade for host readonly state (Eric Wong ) + + * replicate: reduce backoff for too_happy FIDs (Eric Wong ) + + * fsck: this avoid redundant fsck log entries (Eric Wong ) + + * fsck: do not log FOND if note_on_device croaks (Eric Wong ) + + * reaper: detect resurrection of "dead" devices (Eric Wong ) + +2014-12-15: Release version 2.72 + + * Work with DBD::SQLite's latest lock errors (dormando ) + + * remove update_host_property (Eric Wong ) + + * remove users of unreachable_fids table (Eric Wong ) + + * monitor: batch MySQL device table updates (Eric Wong ) + + * monitor: defer DB updates until all HTTP requests are done (Eric Wong ) + + * connection/poolable: defer expiry of timed out connections (Eric Wong ) + + * connection/poolable: disable watch_write before retrying write (Eric Wong ) + + * connection/poolable: do not write before event_write (Eric Wong ) + + * add conn_pool_size configuration option (Eric Wong ) + + * enable TCP keepalives for iostat watcher sockets (Eric Wong ) + + * host: add "readonly" state to override device "alive" state (Eric Wong ) + + * add LICENSE file to distro (dormando ) + +2013-08-18: Release version 2.70 + + * This release features a very large rewrite to the Monitor worker to run + checks in parallel. There are no DB schema changes. + + * replicate: use persistent connection from pool if possible (Eric Wong ) + + * replicate: enforce expected Content-Length in http_copy (Eric Wong ) + + * create_open: parallelize directory vivification (Eric Wong ) + + * device: reuse HTTP connections for MKCOL (Eric Wong ) + + * delete worker uses persistent HTTP connections (Eric Wong ) + + * httpfile: use HTTP connection pool for DELETE (Eric Wong ) + + * httpfile: use Net::HTTP::NB, remove LWP::UserAgent (Eric Wong ) + + * fsck: parallelize size checks for any given FID (Eric Wong ) + + * monitor: refactor/rewrite to use new async API (Eric Wong ) + +2013-08-07: Release version 2.68 + + * optimize monitor worker for large installs (Eric Wong ) + + * allow startup without job_master (and dependent workers) (Eric Wong ) + + * store: do not disconnect for max_handles while locked (Eric Wong ) + + * store: do not auto-reconnect while holding a lock (Eric Wong ) + + * add naive MultipleDevice replication policy (Eric Wong ) + + * httpfile: log mogstored I/O errors when checksumming (Eric Wong ) + + * ProcManager: log socketpair errors correctly (Eric Wong ) + + * fix "drain" handling used by MultipleHosts replpolicy (Eric Wong ) + + * httpfile: correct timeouts for sidechannel digest (Eric Wong ) + + * httpfile: correct FILE_MISSING check in digest_mgmt (Eric Wong ) + + * mogstored: avoid bareword on IO::AIO w/o fadvise (Eric Wong ) + + * ProcManager: only log times_out_of_qworkers for new queries (Eric Wong ) + + * Don't emit warnings if we're lacking the space free of a device. If we don't find space on devices with known space free, try the unknowns. (Dave Lambley ) + + * list_keys: escape in Store, allow [%\\] as prefix (Eric Wong ) + + * list_keys: consistent ESCAPE usage across DB types (Eric Wong ) + +2013-02-02: Release version 2.67 + + * Serialize tempfile reaping (dormando ) + + * reaper: ensure worker can be stopped via "!want" (Eric Wong ) + + * domain removal also removes its default class (Eric Wong ) + + * store: wrap create_class in a transaction to avoid races (Eric Wong ) + + * mogstored: fix kqueue usage with daemonization (Eric Wong ) + + * Filter the devices before we do an expensive sort. (Dave Lambley ) + + * httpfile: avoid killing worker on down sidechannel (Eric Wong ) + + * move checksum and tempfile delete to delete worker (Eric Wong ) + + * sqlite: use immediate transactions to prevent busy errors (Eric Wong ) + + * disable Nagle's algorithm for accepted clients (Eric Wong ) + + * ProcManager: favor using recently-used queryworkers (Eric Wong ) + + * Do both sorts in one method, to save on shared initialisation. (Dave Lambley ) + + * Pull out device sorting into it's own method for overriding. (Dave Lambley ) + + * Reseed the random number generator after forking. (Dave Lambley ) + + * support nginx server type in mogstored command line options (Daniel Frett ) + (also Gernot Vormayr , others) + +2013-01-06: Release version 2.66 + + * add a hook to cmd_updateclass (Daniel Frett ) + + * support updating the class to the default class which has an id of 0 (Daniel Frett ) + + * reaper: validate DB connection before reaping (Eric Wong ) + Fixes occasional crash in reaper process. + + * improve handling of classes which change hash algorithm (Eric Wong ) + + * fsck: skip non-existent FIDs properly (Eric Wong ) + + * fsck: use replicate lock when fixing FID (Eric Wong ) + + * query: allow "0" key on all commands which take keys (Eric Wong ) + + * prevent reqid mismatches (and queryworker death) (Eric Wong ) + Fixes crash case with specific error types. + + * fix use_dest_devs for rebalance (Pyry Hakulinen ) + Fixes "use_dest_devs" argument during rebalance. + + * Fix "skip_devcount" during rebalance (Pyry Hakulinen ) + Now actually skips updating devcount column during rebalance. + +2012-08-13: Release version 2.65 + + * Postgres advisory lock instead of table-based lock (Robin H. Johnson ) + Now requires minimum Postgres version of pg8.4. + + * reaper: switch to Danga::Socket for scheduling (Eric Wong ) + + * reaper: add queue_size_for_reaper server setting (Eric Wong ) + + * reaper: add "queue_rate_for_reaper" server setting (Eric Wong ) + defaults to 1000, same as previously. + + * reaper: global lock around DB interaction (Eric Wong ) + prevents reapers clobbering each other, causing a reduction in DB writes. + + * tests: add basic test for reaper (Eric Wong ) + + * fix tests when /etc/mogilefs/mogstored.conf exists (Eric Wong ) + + * iostat: increase flexibility of iostat parser (Eric Wong ) + + * iostat: allow MOG_IOSTAT_CMD env override (Eric Wong ) + + * When a mogstored is down, die with a more informative message. (Dave Lambley ) + +2012-06-21: Release version 2.64 + + * Delete memcache data when we replicate fids (Pyry Hakulinen ) + + * implement "case_sensitive_list_keys" server setting (Eric Wong ) + + * get_paths: deprioritize devs in "drain" state (Eric Wong ) + + * make marking a host down cause devices to act as down (Eric Wong ) + + * monitor skips hosts marked dead or down (Eric Wong ) + +2012-05-29: Release version 2.63 + + * Critical bugfix for Postgres users introduced by 2.61. New file uploads + would fail. (noticed by robin H. Johnson, fixed by Eric Wong) + +2012-05-19: Release version 2.62 + + * Critical bugfix for a compilation error (dormando, reported by Robin) + + * Fix for upgrading a Postgres install for checksums (Robin * H. Johnson ) + +2012-05-18: Release version 2.61 + + * fix issue #57 by Pyry and Eric (dormando ) + (mogadm host status sometimes allowed typos) + + * avoid unnecessary devcount update in create_close (Eric Wong ) + + * size arg check from tomas doran (dormando ) + (pass in a "size" argument (in bytes) to create_open and mogilefs will + ensure there's enough free space for it) + + * sqlite: implement locking via tables (Eric Wong ) + + * worker/query: Add optional callid parameter (Gernot Vormayr ) + (allows command pipelining) + + * delete: prevent orphan files from replicator race (Eric Wong ) + + * fsck: prevent running over 100% completion (Eric Wong ) + + * fsck: cleanup and reduce unnecessary devcount updates (Eric Wong ) + + * fsck: update devcount, forget devs on unfixable FIDs (Eric Wong ) + + * fsck: log bad count correctly instead of policy violation (Eric Wong ) + + * tests: add tests for fsck functionality (Eric Wong ) + + * monitor: only broadcast reject_bad_md5 on change (Eric Wong ) + + * worker: delete_domain returns has_classes error (Eric Wong ) + + * log: enable autoflush for stdout logging (Eric Wong ) + +2012-03-30: Release version 2.60 + + * Fix fsck status when running for the first time (dormando ) + + * Checksum support (Major update!) (Eric Wong ) + See doc/checksums.txt for an overview of how the new checksum system + works. Also keep an eye on the wiki (http://www.mogilefs.org) for more + complete documentation in the coming weeks. + +2012-02-29: Release version 2.59 + + * don't make SQLite error out on lock calls (dormando ) + + * Postgres wasn't honoring a no-wait timeout (dormando ) + +2012-02-28: Release version 2.58 + + * reduce UPDATE's to device table (dormando ) + + * replicate: retry on short writes to destination (Eric Wong ) + + * enable TCP keepalives for accepted sockets (Eric Wong ) + + * Populate memcache even when noverify is missing (Pyry Hakulinen ) + + * Remove FID from memcache when it is deleted (Pyry Hakulinen ) + + * Unset memc when memcache servers have been removed (Pyry Hakulinen ) + + * Filter out illegal memcache servers from memcache_servers (Pyry Hakulinen ) + + * Add skip_mkcol setting for servers which do not need it (Pyry Hakulinen ) + + * skip_devcount variable that disables devcount updates (Pyry Hakulinen ) + + * Back off if master DB is down (dormando ) + + * use cached setting for slave list (dormando ) + + * fix "fsck never stops" issue (dormando ) + +2012-01-31: Release version 2.57 + + * Silence a warning about undefined value introduced by the slave database ordering work (hachi@kuiki.net) + + * Need a hook to be able to check the slave list during startup (hachi@kuiki.net) + +2012-01-27: Release version 2.56 + + * Add database connect timeouts (dormando ) + + * fix potential hang in JobMaster added in 2.55 (dormando ) + + * fix "set_weight" command (Eric Wong ) + + * sqlite: consider SQLITE_BUSY and SQLITE_LOCKED deadlocks (Eric Wong ) + + * kill a little more dead fsck/Checker code (dormando ) + + * Destroy mogdeps (dormando ) + + * improve slave handling code (dormando ) + + * don't spew errors if the master goes down (dormando ) + + * provide an async server setting cache via monitor worker (dormando ) + + * fix return value of commands that clear_cache (Eric Wong ) + + * make reaper wait less time in test mode (dormando ) + + * queryworker: reimplement "clear_caches" command (Eric Wong ) + + * worker: use timeout with read_from_parent() instead of sleep (Eric Wong ) + + * Fixed issue where too many devices being active could cause the monitor job + to be timed out by the watchdog. (Tomas Doran) + +2011-11-14: Release version 2.55 + + * fixed sources in replication are now a suggestion (dormando ) + + * More stats to help troubleshoot rebalance (dormando ) + + * return 1 from condthrow if all is well (dormando ) + + * schedule fsck replications for the future. (dormando ) + + * stop enforcing a source id on new file upload (dormando ) + + * restore new file upload replication priority (dormando ) + + * Prevent queue related MySQL query hangs (dormando ) + +2011-10-29: Release version 2.54 + + * Issue 36: Further insulate from mysql restarts resetting the fid counter (dormando ) + + * Allow default class to be overridden again. (dormando ) + + * Fix crash in old delete worker code (Daniel Frett ) + + * Fix mb_free() counting empty device as missing (git.user ) + + * sqlite: fix mogdbsetup failure on new setups (Eric Wong ) + + * fid size workaround for lighttpd 1.4.x (dormando ) + + * Fix various issues in the new mogilefs server query worker. (Tomas Doran ) + + * Allow chunked PUT's of unlimited size (dormando ) + + * Fix listing FIDs. + * Fix fsck fixing a file from logging a message which appears to be + two commands (due to spurious \n). Also fix debug function to + not be able to log messages like this. + +2011-08-12: Release version 2.53 + + * Fix host status not updating for trackers (important) + +2011-07-30: Release version 2.52 + + * Plugins (such as FilePaths) might want to be run in info/debug commands (Michael Baehr ) + + * schema version 14; support devices > 16TB (dormando ) + + * Make FSCK worker handle dead devices (dormando ) + + * Throw out old HTTPFile->size code and use LWP (dormando ) + + * Remove forced loading of DBD::mysql (dormando ) + + * Don't test write on readonly devices (Akzhan Abdulin ) + +2011-06-27: Release version 2.51 + + * Add minor backcompat for ZoneLocal and friends (dormando ) + + * Only monitor a device if we should monitor it. (dormando ) + +2011-06-15: Release version 2.50 + + * Completely rewire meta object caches. Device/Host/Class/Domain caches are + now asynchronously loaded via the Monitor worker. This should drastically + reduce the number of small queries issued against the master database. + (dormando ) + + * Don't run workers until Monitor runs once (dormando ) + + * s/TYPE/ENGINE/ for innodb initialization (dormando ) + + * Create mogile database explicitly with UTF-8 encoding. (Robin H. Johnson ) + + * Actually use the MOGTEST_* variables for Postgres testing... (Robin H. Johnson ) + + * Add support for config file includes for Mogstored under lighttpd (Jason Mills ) + + * Make postgres use new delete worker code (Robin H. Johnson ) + + * New test env variables: MOGTEST_DBROOT{USER,PASS} for easier test usage. (Robin H. Johnson ) + + * Document test variables. (Robin H. Johnson ) + + * Support inclusion of custom lighttpd config. (Jason Mills ) + +2011-01-13: Release version 2.46 + + * Fix job_master crash under postgres (Robin H. Johnson ) + + * Revert "only dbh->ping if not used in over a minute" (dormando ) + *IMPORTANT FIX* if you're using 2.45, you should upgrade to this. + + * Example application. (dormando ) + + * missing $fidid for source_down error message (dormando ) + +2011-01-08: Release version 2.45 + + * file_debug command (dormando ) + + * add "file_info" command for fetching metadata. (dormando ) + + * don't allow clients to upload to the wrong dev (dormando ) + + * validate database handles in the right places (dormando ) + + * only dbh->ping if not used in over a minute (dormando ) + + * make list_fids work with gaps (dormando ) + + * Postgres enqueue_fids_* cleanup (Robin H. Johnson ) + + * Fixes MogileFS::Store::retry_on_deadlock to honor non-deadlock exceptions (Jason Mills ) + + * Implements tests for MogileFS::Store::retry_on_deadlock (Jason Mills ) + + * Implements tests for MogileFS::Store::ignore_replace (Jason Mills ) + + * job_master was never revalidating its dbh handle. (dormando ) + + * add a couple useful stats (dormando ) + + * Correct iostat command on darwin (Tomas Doran ) + + * Fix test db creation with SQLite (Tomas Doran ) + + * Fix SQLite so tracker at least starts (Tomas Doran ) + + * additional debian packaging fixes (kad ) + + * debian packaging updates. (Andre Bohr ) + + * Documentation updates. (Pyry Hakulinen ) + + * HTTPFile returns -1 if the file is missing from the device. Content-Length: 0 is valid and it should return BLEN not MISS. (Pyry Hakulinen ) + + * some further editing of the info file. (dormando ) + + * Changed repository metadata (Tomas Doran ) + +2010-11-25: Release version 2.44 + + * Fix issue with rebalance not filtering source devs from the + destination list. + + * Make global rebalance limits work (Martijn Lina) + +2010-10-10: Release version 2.43 + + * Make over-replication fixable again. (sigh) + + * Fix old bug in MultipleHosts() which would not let you drop from + mindev > 1 down to 1. + +2010-10-08: Release version 2.42 + + * Make FSCK run again (Andre Pascha) + + * Fix Postgres schema upgrade issue (hopefully) + +2010-10-06: Release version 2.41 + + * One line change that to fix uprading to 2.40 + +2010-09-28: Release version 2.40 + + * Optimize worker sleep patterns and worker/parent communications. + * New rebalance/drain code. See the docs/wiki for how to use. ***NOTE*** Old drain/rebalance code is disabled. Setting a device into drain mode no longer does what you think it would. + http://code.google.com/p/mogilefs/wiki/Rebalance 2010-08-13: Release version 2.37 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..86ae4f48 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +License granted to use/distribute under the same terms as Perl itself. diff --git a/MANIFEST b/MANIFEST index b06fb781..28a075be 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,56 +1,23 @@ CHANGES +LICENSE +examples/testapp/testapp-perlbal.conf +examples/testapp/testapp.psgi +examples/testapp/README doc/fsck-notes.txt doc/memcache-support.txt doc/pluggable-replication-policies.txt doc/testing.txt -lib/mogdeps/Danga/Socket.pm -lib/mogdeps/Net/Netmask.pm -lib/mogdeps/Perlbal.pm -lib/mogdeps/Perlbal/AIO.pm -lib/mogdeps/Perlbal/BackendHTTP.pm -lib/mogdeps/Perlbal/Cache.pm -lib/mogdeps/Perlbal/ChunkedUploadState.pm -lib/mogdeps/Perlbal/ClientHTTP.pm -lib/mogdeps/Perlbal/ClientHTTPBase.pm -lib/mogdeps/Perlbal/ClientManage.pm -lib/mogdeps/Perlbal/ClientProxy.pm -lib/mogdeps/Perlbal/CommandContext.pm -lib/mogdeps/Perlbal/HTTPHeaders.pm -lib/mogdeps/Perlbal/ManageCommand.pm -lib/mogdeps/Perlbal/Plugin/AccessControl.pm -lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm -lib/mogdeps/Perlbal/Plugin/Highpri.pm -lib/mogdeps/Perlbal/Plugin/NotModified.pm -lib/mogdeps/Perlbal/Plugin/Queues.pm -lib/mogdeps/Perlbal/Plugin/Stats.pm -lib/mogdeps/Perlbal/Plugin/Vhosts.pm -lib/mogdeps/Perlbal/Plugin/AtomInject.pm -lib/mogdeps/Perlbal/Plugin/AtomStream.pm -lib/mogdeps/Perlbal/Plugin/Cgilike.pm -lib/mogdeps/Perlbal/Plugin/EchoService.pm -lib/mogdeps/Perlbal/Plugin/Include.pm -lib/mogdeps/Perlbal/Plugin/LazyCDN.pm -lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm -lib/mogdeps/Perlbal/Plugin/Palimg.pm -lib/mogdeps/Perlbal/Plugin/Redirect.pm -lib/mogdeps/Perlbal/Plugin/Vpaths.pm -lib/mogdeps/Perlbal/Pool.pm -lib/mogdeps/Perlbal/ReproxyManager.pm -lib/mogdeps/Perlbal/Service.pm -lib/mogdeps/Perlbal/Socket.pm -lib/mogdeps/Perlbal/TCPListener.pm -lib/mogdeps/Perlbal/Test.pm -lib/mogdeps/Perlbal/Test/WebClient.pm -lib/mogdeps/Perlbal/Test/WebServer.pm -lib/mogdeps/Perlbal/UploadListener.pm -lib/mogdeps/Perlbal/Util.pm -lib/mogdeps/Perlbal/SocketSSL.pm -lib/mogdeps/Sys/Syscall.pm +doc/checksums.txt +lib/MogileFS/Checksum.pm lib/MogileFS/Class.pm lib/MogileFS/Config.pm lib/MogileFS/Connection/Client.pm +lib/MogileFS/Connection/HTTP.pm lib/MogileFS/Connection/Mogstored.pm +lib/MogileFS/Connection/Parent.pm +lib/MogileFS/Connection/Poolable.pm lib/MogileFS/Connection/Worker.pm +lib/MogileFS/ConnectionPool.pm lib/MogileFS/DevFID.pm lib/MogileFS/Device.pm lib/MogileFS/DeviceState.pm @@ -62,7 +29,9 @@ lib/MogileFS/HTTPFile.pm lib/MogileFS/IOStatWatcher.pm lib/MogileFS/Overview.pm lib/MogileFS/ProcManager.pm +lib/MogileFS/Rebalance.pm lib/MogileFS/ReplicationPolicy.pm +lib/MogileFS/ReplicationPolicy/MultipleDevices.pm lib/MogileFS/ReplicationPolicy/MultipleHosts.pm lib/MogileFS/ReplicationPolicy/Union.pm lib/MogileFS/ReplicationRequest.pm @@ -71,9 +40,13 @@ lib/MogileFS/Store.pm lib/MogileFS/Store/MySQL.pm lib/MogileFS/Store/Postgres.pm lib/MogileFS/Store/SQLite.pm -lib/MogileFS/Sys.pm lib/MogileFS/Test.pm lib/MogileFS/Util.pm +lib/MogileFS/Factory.pm +lib/MogileFS/Factory/Domain.pm +lib/MogileFS/Factory/Class.pm +lib/MogileFS/Factory/Host.pm +lib/MogileFS/Factory/Device.pm lib/MogileFS/Worker.pm lib/MogileFS/Worker/Delete.pm lib/MogileFS/Worker/Fsck.pm @@ -88,11 +61,13 @@ lib/Mogstored/ChildProcess/IOStat.pm lib/Mogstored/FIDStatter.pm lib/Mogstored/HTTPServer.pm lib/Mogstored/HTTPServer/Apache.pm +lib/Mogstored/HTTPServer/Nginx.pm lib/Mogstored/HTTPServer/Lighttpd.pm lib/Mogstored/HTTPServer/Perlbal.pm lib/Mogstored/HTTPServer/None.pm lib/Mogstored/SideChannelClient.pm lib/Mogstored/SideChannelListener.pm +lib/Mogstored/TaskQueue.pm Makefile.PL MANIFEST mogautomount @@ -100,15 +75,26 @@ mogdbsetup mogilefsd mogstored t/00-startup.t +t/01-domain-class.t +t/02-host-device.t t/10-weighting.t t/20-filepaths.t -t/domains-classes.t +t/30-rebalance.t +t/40-httpfile.t +t/50-checksum.t +t/51-checksum_class_change.t +t/60-fsck.t +t/70-reaper.t +t/80-job_master.t +t/checksum.t t/fid-stat.t -t/hosts-devices.t +t/http.t t/mogstored-shutdown.t +t/multiple-devices-replpol.t t/multiple-hosts-replpol.t t/replpolicy-parsing.t t/replpolicy.t +t/store-sqlite.t t/store.t t/util.t TODO diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index 51132237..602bcd62 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -14,6 +14,7 @@ _blib$ \bdebian\b \bconf\b \.svn +\.git dev-killmogstored.sh make-par.sh mogstored.pp diff --git a/Makefile.PL b/Makefile.PL index 6dcbb9e5..f136b115 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -16,14 +16,14 @@ $exefiles = ["mogstored"] if exists $ENV{DANGABUILD_MOGSTOREDONLY}; $exefiles = ["mogilefsd"] if exists $ENV{DANGABUILD_MOGILEFSDONLY}; WriteMakefile( - NAME => 'mogilefs-server', + NAME => 'MogileFS::Server', VERSION_FROM => 'lib/MogileFS/Server.pm', AUTHOR => 'Brad Fitzpatrick ', ABSTRACT_FROM => 'lib/MogileFS/Server.pm', EXE_FILES => $exefiles, PREREQ_PM => { 'Danga::Socket' => '1.56', - 'Perlbal' => '1.76', + 'Perlbal' => '1.79', 'Sys::Syslog' => 0, 'Sys::Syscall' => '0.22', 'Getopt::Long' => 0, @@ -33,17 +33,17 @@ WriteMakefile( 'IO::AIO' => 0, 'MogileFS::Client' => 0, DBI => 0, + 'Test::More' => 0.88, # 0.88 for done_testing() support }, META_MERGE => { no_index => { - directory => 'lib/mogdeps', package => ['ProcessHandle', 'TrackerHandle', 'MogstoredHandle', 'MogPath', 'Mgd'], }, resources => { - homepage => 'http://danga.com/mogilefs/', - bugtracker => 'http://rt.cpan.org/Public/Dist/Display.html?Name=mogilefs-server', - repository => 'http://code.sixapart.com/svn/mogilefs/', + homepage => 'http://www.mogilefs.org', + bugtracker => 'http://code.google.com/p/mogilefs/issues/list', + repository => 'git://github.com/mogilefs/MogileFS-Server.git', MailingList => 'http://groups.google.com/group/mogile', }, diff --git a/MogileFS-Server.spec b/MogileFS-Server.spec index 50147b16..142e1b7a 100644 --- a/MogileFS-Server.spec +++ b/MogileFS-Server.spec @@ -2,7 +2,7 @@ name: MogileFS-Server summary: MogileFS-Server - MogileFS Server daemons and utilities. -version: 2.36 +version: 2.46 release: 2%{?dist} vendor: Alan Kasindorf packager: Jonathan Steinert @@ -10,7 +10,7 @@ license: Artistic group: Applications/CPAN buildroot: %{_tmppath}/%{name}-%{version}-%(id -u -n) buildarch: noarch -source: mogilefs-server-%{version}.tar.gz +source: MogileFS-Server-%{version}.tar.gz autoreq: no requires: MogileFS-Server-mogilefsd = %{version}-%{release} requires: MogileFS-Server-mogstored = %{version}-%{release} @@ -26,10 +26,10 @@ This is a dummy package which depends on all the others so you can install them %prep rm -rf "%{buildroot}" -%setup -n mogilefs-server-%{version} +%setup -n MogileFS-Server-%{version} %build -%{__perl} Makefile.PL INSTALLDIRS="vendor" PREFIX=%{buildroot}%{_prefix} +%{__perl} Makefile.PL INSTALLDIRS="vendor" PREFIX=%{buildroot}%{_prefix} INSTALL_BASE= make all make test @@ -38,10 +38,6 @@ make pure_install [ -x /usr/lib/rpm/brp-compress ] && /usr/lib/rpm/brp-compress -# remove mogdeps and related files -rm -rf %{buildroot}/%{perl_vendorlib}/mogdeps -rm -f %{buildroot}/usr/share/man/man3/mogdeps::* - # remove special files find %{buildroot} \( \ -name "perllocal.pod" \ diff --git a/TESTING b/TESTING index 28dbd4d7..fa73d2c6 100644 --- a/TESTING +++ b/TESTING @@ -10,12 +10,13 @@ Testing under Postgresql: ------------------------- Database setup: # createuser -SRlD mogile -# createdb -E UTF8 -O mogile tmp_mogiletest +# createdb --template template0 -E UTF8 -O mogile tmp_mogiletest To run the tests: # make test \ MOGTEST_DBUSER=mogile \ MOGTEST_DBNAME=tmp_mogiletest \ + MOGTEST_DBROOTUSER=postgres \ MOGTEST_DBTYPE=Postgres Testing under SQLite: @@ -24,3 +25,22 @@ Testing under SQLite: MOGTEST_DBUSER=mogile \ MOGTEST_DBNAME=tmp_mogiletest \ MOGTEST_DBTYPE=SQLite + +Available test variables: +------------------------- +MOGTEST_DBNAME +MOGTEST_DBTYPE +MOGTEST_DBUSER +MOGTEST_DBPASS +MOGTEST_DBROOTUSER +MOGTEST_DBROOTPASS + +Other use test notes: +--------------------- +- Running the tests should be done with equally new versions of MogileFS-Utils + and perl-MogileFS-Client. If you have all 3 git repos checked out in + parallel, set: + "PATH=../MogileFS-Utils/:$PATH PERL5LIB=../perl-MogileFS-Client/lib" +- You can control the disk space used for the test mogstored's by setting + TMPDIR. + diff --git a/conf/mogilefsd.conf b/conf/mogilefsd.conf index c46a3f02..ad3ebb92 100644 --- a/conf/mogilefsd.conf +++ b/conf/mogilefsd.conf @@ -1,4 +1,7 @@ -#daemonize = 1 +# Enable daemon mode to work in background and use syslog +daemonize = 1 +# Where to store the pid of the daemon (must be the same in the init script) +pidfile = /var/run/mogilefsd/mogilefsd.pid # Database connection information db_dsn = DBI:mysql:mogilefs:host=127.0.0.1 db_user = username diff --git a/debian/changelog b/debian/changelog index 856f8813..818c074e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,65 @@ +mogilefs-server (2.44-1) stable; urgency=low + + * pull from upstream + + -- Dormando Thu, 23 Dec 2010 21:58:49 -0800 + +mogilefs-server (2.43-1) stable; urgency=low + + * pulled 2.43 from github which includes fixes from 2.42-1 + * added pidfile entry to default mogilefsd.conf to fix pidfile creation bug + * added daemon = 1 to default mogilefsd.conf to make mogilefsd use syslog + + -- Andre Pascha Mon, 11 Oct 2010 10:21:27 +0200 + +mogilefs-server (2.42-1kwick2) stable; urgency=low + + * added fixes for over replicated files of todays github pull + * include mindevcount=1 fixes + + -- Andre Pascha Sun, 10 Oct 2010 14:34:25 +0200 + +mogilefs-server (2.42-1kwick1) stable; urgency=low + + * git pull of today + * includes fix from Andre Pascha to make FSCK work again + + -- Andre Pascha Sat, 09 Oct 2010 18:12:18 +0200 + +mogilefs-server (2.41-1kwick3) stable; urgency=low + + * fixed problem with wrong permissions on mogilefsd pid file + + -- Andre Pascha Fri, 08 Oct 2010 17:43:44 +0200 + +mogilefs-server (2.41-1kwick2) stable; urgency=low + + * added pid directory to mogstored/mogilefsd. fixes problem with stopping + the daemon via init skript when daemon is set to 1 in the config. Daemon + needs to be set to 1 or syslogging won't be enabled. + + -- Andre Pascha Fri, 08 Oct 2010 17:33:11 +0200 + +mogilefs-server (2.41-1kwick1) stable; urgency=low + + * moved to github version of MogileFS-Server + * package versioning now coresponds to mogilefs version + + -- Andre Pascha Fri, 08 Oct 2010 16:59:33 +0200 + +mogilefs-server (1.00-2kwick2) stable; urgency=low + + * added dependency to Danga::Socket for mogostored + + -- Andre Pascha Fri, 08 Oct 2010 15:29:46 +0200 + +mogilefs-server (1.00-2kwick1) stable; urgency=low + + * Non-maintainer upload. + * SVN snapshot of 06 Oct 2010 + + -- Andre Pascha Thu, 07 Oct 2010 17:26:01 +0200 + mogilefs-server (1.00-2) unstable; urgency=low * fix data loss bug when the 'tempfile' table is InnoDB and the diff --git a/debian/control b/debian/control index 7524611c..f0381d45 100644 --- a/debian/control +++ b/debian/control @@ -2,12 +2,12 @@ Source: mogilefs-server Section: perl Priority: optional Maintainer: Jonathan Steinert -Build-Depends-Indep: libstring-crc32-perl +Build-Depends-Indep: debhelper (>= 4.1.40), perl (>= 5.6.0-16), libstring-crc32-perl Standards-Version: 3.6.1.0 Package: mogstored Architecture: all -Depends: ${perl:Depends}, debhelper (>= 4.1.40), libperlbal-perl, libio-aio-perl, debconf (>= 1.2.0) +Depends: ${perl:Depends}, libperlbal-perl (>= 1.79), libio-aio-perl, debconf (>= 1.2.0), sysstat Suggests: mogilefs-utils Description: storage node daemon for MogileFS Mogstored is a storage node daemon for MogileFS, the open-source @@ -15,7 +15,7 @@ Description: storage node daemon for MogileFS Package: mogilefsd Architecture: all -Depends: ${perl:Depends}, debhelper (>= 4.1.40), libdbd-mysql-perl, libdbi-perl, debconf (>= 1.2.0), libnet-netmask-perl, libwww-perl +Depends: ${perl:Depends}, libdbd-mysql-perl, libdbi-perl, debconf (>= 1.2.0), libnet-netmask-perl, libwww-perl, libdanga-socket-perl Suggests: mogilefs-utils Description: scalable distributed filesystem from Danga Interactive MogileFS is an open-source, application-level distributed filesystem. It diff --git a/debian/mogilefsd.init b/debian/mogilefsd.init index 75c13559..b17a70e0 100755 --- a/debian/mogilefsd.init +++ b/debian/mogilefsd.init @@ -15,7 +15,8 @@ DAEMON=/usr/bin/mogilefsd NAME=mogilefsd DESC=mogilefsd DEFAULTS=/etc/default/$NAME -PIDFILE=/var/run/$NAME.pid +PIDDIR=/var/run/$NAME +PIDFILE=$PIDDIR/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME # Exit if the package is not installed @@ -47,6 +48,13 @@ set -e # do_start() { + + if ! test -d ${PIDDIR} + then + mkdir ${PIDDIR} + chown ${MOGILEFSD_RUNASUSER} ${PIDDIR} + fi + if [ -e $PIDFILE ] then @@ -61,7 +69,7 @@ do_start() fi - start-stop-daemon --start --quiet --exec $DAEMON --pidfile $PIDFILE -b -m --name $NAME --chuid $MOGILEFSD_RUNASUSER + start-stop-daemon --start --quiet --exec $DAEMON -b --name $NAME --chuid $MOGILEFSD_RUNASUSER -- $MOGILEFSD_EXTRA_OPTS } # diff --git a/debian/mogilefsd.install b/debian/mogilefsd.install index 0cd0acf4..d06e00a5 100644 --- a/debian/mogilefsd.install +++ b/debian/mogilefsd.install @@ -3,5 +3,3 @@ usr/bin/mogilefsd /usr/bin usr/share/man/man1/mogilefsd.1p /usr/share/man/man1 usr/share/man/man3/MogileFS* /usr/share/man/man3 usr/share/perl5/MogileFS /usr/share/perl5 -usr/share/perl5/dev-mogstored.pl /usr/share/mogilefsd -usr/share/perl5/makedocs.pl /usr/share/mogilefsd diff --git a/debian/mogilefsd.postrm b/debian/mogilefsd.postrm index 33aa8910..ce101287 100644 --- a/debian/mogilefsd.postrm +++ b/debian/mogilefsd.postrm @@ -11,5 +11,5 @@ if [ "$1" = "purge" ] rmdir --ignore-fail-on-non-empty /etc/default fi -rm -f /var/run/mogilefsd.pid +rm -fr /var/run/mogilefsd diff --git a/debian/mogstored.init b/debian/mogstored.init index 682a125e..06c5ec76 100755 --- a/debian/mogstored.init +++ b/debian/mogstored.init @@ -15,7 +15,8 @@ DAEMON=/usr/bin/mogstored NAME=mogstored DESC=mogstored DEFAULTS=/etc/default/$NAME -PIDFILE=/var/run/$NAME.pid +PIDDIR=/var/run/$NAME +PIDFILE=$PIDDIR/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME # Exit if the package is not installed @@ -47,6 +48,12 @@ set -e # do_start() { + if ! test -d ${PIDDIR} + then + mkdir ${PIDDIR} + chown ${MOGSTORED_RUNASUSER} ${PIDDIR} + fi + if [ -e $PIDFILE ] then diff --git a/debian/mogstored.postrm b/debian/mogstored.postrm index 4c9069ca..bb836fc6 100644 --- a/debian/mogstored.postrm +++ b/debian/mogstored.postrm @@ -19,6 +19,6 @@ fi #DEBHELPER# - rm -f /var/run/mogstored.pid + rm -fr /var/run/mogstored diff --git a/debian/rules b/debian/rules index 24232808..41c724e3 100755 --- a/debian/rules +++ b/debian/rules @@ -44,10 +44,6 @@ install-stamp: find $(TMP) -name .packlist -exec rm '{}' \; find $(TMP) -depth -type d -empty -exec rmdir '{}' \; - # destroy all the mogdeps stuff - rm -rf $(TMP)/usr/share/perl5/mogdeps - rm -rf $(TMP)/usr/share/man/man3/mogdeps::* - dh_install --sourcedir=$(TMP) --fail-missing touch install-stamp diff --git a/doc/checksums.txt b/doc/checksums.txt new file mode 100644 index 00000000..47be6ed7 --- /dev/null +++ b/doc/checksums.txt @@ -0,0 +1,101 @@ +database +-------- + +checksum hashes are represented as binary (VARBINARY for MySQL/SQLite, +bytea for Postgres) columns in the database. They can be up to 64 bytes +large for SHA512 or WHIRLPOOL. + +tracker protocol +---------------- + +Hex is used instead of binary over the wire, hex is already immune to +URL encoding in the tracker protocol. + +Hashes are represented with in the $HASHTYPE:$HEXDIGEST format: + + MD5:68b329da9893e34099c7d8ad5cb9c940 + +verifying checksums (on disk) +----------------------------- + +Ideally, mogstored checksum calculation is done by mogstored and only +the checksum (in $HASHTYPE=$HEXDIGEST format) is sent over the wire. + +If mogstored is not available, the checksum is calculated on the tracker +by streaming the file with HTTP GET. + +create_close (query worker) +--------------------------- + +New optional parameters: + +- checksumverify=(0|1) default: 0 (false) +- checksum=$HASHTYPE:$HEXDIGEST + +If "checksumverify" is "1" and "checksum" is present, "create_close" +will not return until it has verified the checksum. + +If the storage class of a file specifies a valid "hashtype", +the checksum is saved to the "checksum" table in the database. + +The client is _never_ required to supply a checksum. + +The client may always supply a checksum (including checksumverify) +even if the storage class of the file does not required. + +monitor +------- + +Will also attempt to check if the storage server can reject PUTs +with mismatched Content-MD5 headers. Replication will use this +info to avoid calling HTTPFile->digest post-replication to verify +the upload completed successfully. + +replication +----------- + +The replication worker can calculate the checksum of the file while it +streams the source file (via GET) to the destination via PUT. + +If the client did not supply a checksum for create_close but the file +storage class requires a checksum, the replication worker will save the +calculated checksum after initial replication. + +If the checksum row for a file already exists before replication, +replication will verify the checksum it got (via GET) against the +checksum row. Replication fails if checksum verification fails +against the database row. + +Replication will also put a "Content-MD5" header in the PUT request if +using MD5 and the checksum is known before replication starts. The +storage server may (but is not required to) verify this and return an +HTTP error response if verification fails. The monitor change will +allow us to know if a destination device is capable of rejecting +a request based on a Content-MD5 mismatch. + +If the file storage class requires a checksum: + + If the destination device can NOT reject based on (bad) MD5: + + Replication worker also verifies the checksum + on the destination after replication is complete + (either via mogstored or GET). + +fsck +---- + +If checksum row exists: + verifies all copies match + same behavior as size mismatches in existing code: + if good copies exist + delete bad ones and rereplicate + else if only bad copies exist + log error + +If checksum row is missing but class requires checksum: + if all copies of the file have the same checksum: + create the checksum row + if any device containing a copy down: + wait and revisit this FID later + if any of the copies differ: + log failure and all (hex) checksums + devids diff --git a/examples/testapp/README b/examples/testapp/README new file mode 100644 index 00000000..0a5b3162 --- /dev/null +++ b/examples/testapp/README @@ -0,0 +1,3 @@ +For a walkthrough on starting this example application, see the wiki: + +http://code.google.com/p/mogilefs/wiki/AppExample diff --git a/examples/testapp/testapp-perlbal.conf b/examples/testapp/testapp-perlbal.conf new file mode 100644 index 00000000..d585ba84 --- /dev/null +++ b/examples/testapp/testapp-perlbal.conf @@ -0,0 +1,24 @@ +SERVER max_connections = 100 +SERVER pidfile = perlbal.pid + +XS enable headers + +CREATE POOL testapp_pool + POOL testapp_pool ADD 127.0.0.1:5000 +CREATE SERVICE testapp + SET role = reverse_proxy + SET pool = testapp_pool + SET listen = 0.0.0.0:7070 + SET enable_reproxy = on + SET persist_backend = on + SET backend_persist_cache = 30 + SET connect_ahead = 1 + SET verify_backend = on + SET persist_client = off +ENABLE testapp + +CREATE SERVICE mgmt + SET role = management + SET listen = 127.0.0.1:7071 +ENABLE mgmt + diff --git a/examples/testapp/testapp.psgi b/examples/testapp/testapp.psgi new file mode 100644 index 00000000..8440228a --- /dev/null +++ b/examples/testapp/testapp.psgi @@ -0,0 +1,71 @@ +# It's just a little bit of perl, so hang tight if you're a php/etc user :) +use warnings; +use strict; + +# Import the MogileFS client and a helper util from Plack. +use Plack::Request; +use MogileFS::Client; + +my $TRACKERS = ['tracker1:7001']; +my $DOMAIN = 'toast'; + +# Initialize the client when the server starts. +# You could also do this in the middle of the request. +my $mogc = MogileFS::Client->new(domain => $DOMAIN, + hosts => $TRACKERS); + +sub run { + # Request object for reading paths/cookies/etc. + my $req = shift; + + # Only support GET requests for this example. + # Nothing stops us from supporting HEAD requests, though. + if ($req->method ne 'GET') { + return [ 403, [ 'Content-Type' => 'text/plain' ], + [ 'Only GET methods allowed' ] ]; + } + + # Pull out the GET /whatever path. + my $file = $req->path_info; + + # At this stage you would do some validation, or query your own + # application database for what the MogileFS path actually is. In this + # example we just ensure there is a limited set of characters used. + unless ($file =~ m/^[A-Z0-9.\/\\]+$/gmi) { + return [ 404, [ 'Content-Type' => 'text/plain' ], + [ 'Invalid request format received!' ] ]; + } + + # Ask the MogileFS tracker for the paths to this file. + # At this point you could check memcached for cached paths as well, and + # cache if none were found. + my @paths = $mogc->get_paths($file); + + # If MogileFS returns no paths, the file is likely missing or never + # existed. + unless (@paths) { + return [ 404, [ 'Content-Type' => 'text/plain' ], + [ 'File not found: ' . $file ] ]; + } + + # Now we create the magic Perlbal header, "X-REPROXY-URL". This header + # tells Perlbal to go fetch and return the file from where MogileFS has + # said it is. + # At this point you would add any other headers. If it's a jpeg, you would + # ship the proper 'image/jpeg' Content-Type. In this example we blanket + # serve everything as text/plain. + my $headers = [ 'Content-Type' => 'text/plain', + 'X-REPROXY-URL' => join(' ', @paths) ]; + + # Return a 200 OK, the headers, and no body. The body will be filled in + # with what Perlbal fetches. + return [ 200, $headers, [ ] ]; +} + +# Some simple Plack glue, you can ignore this. +# For a real app you should use a full framework. ;) +my $app = sub { + my $env = shift; + my $req = Plack::Request->new($env); + return run($req); +}; diff --git a/lib/MogileFS/Checksum.pm b/lib/MogileFS/Checksum.pm new file mode 100644 index 00000000..a341304e --- /dev/null +++ b/lib/MogileFS/Checksum.pm @@ -0,0 +1,140 @@ +package MogileFS::Checksum; +use strict; +use warnings; +use overload '""' => \&as_string; + +my %TYPE = ( + "MD5" => { type => 1, bytelen => 128 / 8 }, + "SHA-1" => { type => 2, bytelen => 160 / 8 }, + +# see POD for rationale below +# "SHA-224" => { type => 3, bytelen => 224 / 8 }, +# "SHA-256" => { type => 4, bytelen => 256 / 8 }, +# "SHA-384" => { type => 5, bytelen => 384 / 8 }, +# "SHA-512" => { type => 6, bytelen => 512 / 8 }, +); + +our %NAME2TYPE = map { $_ => $TYPE{$_}->{type} } keys(%TYPE); +our %TYPE2NAME = map { $NAME2TYPE{$_} => $_ } keys(%NAME2TYPE); + +sub valid_alg { + my ($class, $alg) = @_; + + defined($alg) && defined($TYPE{$alg}); +} + +sub new { + my ($class, $row) = @_; + my $self = bless { + fidid => $row->{fid}, + checksum => $row->{checksum}, + hashtype => $row->{hashtype} + }, $class; + + return $self; +} + +# $string = "MD5:d41d8cd98f00b204e9800998ecf8427e" +sub from_string { + my ($class, $fidid, $string) = @_; + $string =~ /\A([\w-]+):([a-fA-F0-9]{32,128})\z/ or + die "invalid checksum string"; + my $hashname = $1; + my $hexdigest = $2; + my $ref = $TYPE{$hashname} or + die "invalid checksum name ($hashname) from $string"; + my $checksum = pack("H*", $hexdigest); + my $len = length($checksum); + $len == $ref->{bytelen} or + die "invalid checksum length=$len (expected $ref->{bytelen})"; + + bless { + fidid => $fidid, + checksum => $checksum, + hashtype => $NAME2TYPE{$hashname}, + }, $class; +} + +sub hashname { + my $self = shift; + my $type = $self->{hashtype}; + my $name = $TYPE2NAME{$type} or die "hashtype=$type unknown"; + + return $name; +} + +sub save { + my $self = shift; + my $sto = Mgd::get_store(); + + $sto->set_checksum($self->{fidid}, $self->{hashtype}, $self->{checksum}); +} + +sub maybe_save { + my ($self, $dmid, $classid) = @_; + my $class = eval { Mgd::class_factory()->get_by_id($dmid, $classid) }; + + # $class may be undef as it could've been deleted between + # create_open and create_close, we've never verified this before... + # class->{hashtype} is also undef, as we allow create_close callers + # to specify a hash regardless of class. + if ($class && defined($class->{hashtype}) && $self->{hashtype} eq $class->{hashtype}) { + $self->save; + } +} + +sub hexdigest { + my $self = shift; + + unpack("H*", $self->{checksum}); +} + +sub as_string { + my $self = shift; + my $name = $self->hashname; + my $hexdigest = $self->hexdigest; + + "Checksum[f=$self->{fidid};$name=$hexdigest]" +} + +sub info { + my $self = shift; + + $self->hashname . ':' . $self->hexdigest; +} + +1; + +__END__ + +=head1 NAME + +MogileFS::Checksum - Checksums handling for MogileFS + +=head1 ABOUT + +MogileFS supports optional MD5 checksums. Checksums can be stored +in the database on a per-class basis or they can be enabled globally +during fsck-only. + +Enabling checksums will greatly increase the time and I/O required for +fsck as all files on all devices to be checksummed need to be reread. + +Fsck and replication will use significantly more network bandwidth if +the mogstored is not used or if mogstored_stream_port is +not configured correctly. Using Perlbal to 1.80 or later for HTTP +will also speed up checksum verification during replication using +the Content-MD5 HTTP header. + +=head1 ALGORITHMS + +While we can easily enable some or all of the SHA family of hash +algorithms, they all perform worse than MD5. Checksums are intended to +detect unintentional corruption due to hardware/software errors, not +malicious data replacement. Since MogileFS does not implement any +security/authorization on its own to protect against malicious use, the +use of checksums in MogileFS to protect against malicious data +replacement is misguided. + +If you have a use case which requires a stronger hash algorithm, +please speak up on the mailing list at L. diff --git a/lib/MogileFS/Class.pm b/lib/MogileFS/Class.pm index a35445ae..ad5a9acb 100644 --- a/lib/MogileFS/Class.pm +++ b/lib/MogileFS/Class.pm @@ -1,245 +1,67 @@ package MogileFS::Class; use strict; +use warnings; +use MogileFS::Util qw(throw); +use MogileFS::Checksum; -my %singleton; # dmid -> classid -> MogileFS::Class -my $last_load = 0; +=head1 -# return MogileFS::Class object for a given fid id/obj -sub of_fid { - my ($pkg, $fid) = @_; - return undef unless $fid; - # make $fid into a FID object: - $fid = MogileFS::FID->new($fid) unless ref $fid; - return undef unless $fid->exists; - my $cl = $pkg->of_dmid_classid($fid->dmid, $fid->classid); - return $cl if $cl; - # return the default class for this file, not undef. this should - # always return a valid class for a valid FID. files need to - # always have a mindevcount (default of 2), repl policy, etc. - return $pkg->of_dmid_classid($fid->dmid, 0); -} - -# return MogileFS::Class, given a dmid and classid. or returns the -# default class, if classid is bogus. -sub of_dmid_classid { - my ($pkg, $dmid, $classid) = @_; - return $singleton{$dmid}{$classid} if - $singleton{$dmid} && - $singleton{$dmid}{$classid} && - $singleton{$dmid}{$classid}->{_loaded}; - $pkg->reload_classes; - return $singleton{$dmid}{$classid} if - $singleton{$dmid} && - $singleton{$dmid}{$classid}; - return undef; -} - -# marks everything dirty, triggering a reload, but doesn't actually -# reload now. will happen later, next time somebody loads something. -sub invalidate_cache { - my $pkg = shift; - $last_load = 0; - $pkg->_foreach_singleton(sub { - my $cl = shift; - $cl->{_loaded} = 0; - }); - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("class"); - } -} - -sub check_cache { - my $pkg = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Class->reload_classes; -} - -sub reload_classes { - my $pkg = shift; - my $now = time(); - - # mark everything as invalid for now - $pkg->_foreach_singleton(sub { - my ($cl, $dmid, $clid) = @_; - $cl->{_loaded} = 0; - }); - - # install the default classes (classid=0) - my $default_min = MogileFS->config('default_mindevcount'); - foreach my $dom (MogileFS::Domain->domains) { - my $dmid = $dom->id; - my $cl = - ($singleton{$dmid}{0} = - bless { - dmid => $dmid, - classid => 0, - name => "default", - mindevcount => $default_min, - }, $pkg); - $cl->{_loaded} = 1; - } - - foreach my $row (Mgd::get_store()->get_all_classes) { - my $cl = - ($singleton{$row->{dmid}}{$row->{classid}} = - bless { - dmid => $row->{dmid}, - classid => $row->{classid}, - name => $row->{classname}, - mindevcount => $row->{mindevcount}, - replpolicy => $row->{replpolicy}, - }, $pkg); - $cl->{_loaded} = 1; - } - - # delete any singletons that weren't just loaded - $pkg->_foreach_singleton(sub { - my ($cl, $dmid, $clid) = @_; - return if $cl->{_loaded}; - delete $singleton{$dmid}{$clid}; - }); - - $last_load = $now; -} - -# enumerates all loaded singletons (without reloading/checked caches), -# calling the given subref with (MogileFS::Class, $dmid, $classid) -sub _foreach_singleton { - my ($pkg, $cb) = @_; - foreach my $dmid (keys %singleton) { - foreach my $clid (keys %{$singleton{$dmid}}) { - $cb->($singleton{$dmid}{$clid}, $dmid, $clid); - } - } -} +MogileFS::Class - Class class. -# enumerates all classes, (reloading if needed), calling the given -# subref with (MogileFS::Class, $dmid, $classid) -sub foreach { - my ($pkg, $cb) = @_; - $pkg->check_cache; - $pkg->_foreach_singleton($cb); -} +=cut -sub class_name { - my ($pkg, $dmid, $classid) = @_; - my $cls = $pkg->of_dmid_classid($dmid, $classid) - or return undef; - return $cls->name; +sub new_from_args { + my ($class, $args, $domain_factory) = @_; + return bless { + domain_factory => $domain_factory, + mindevcount => 2, + %{$args}, + }, $class; } -sub class_id { - my ($pkg, $dmid, $classname) = @_; - return undef unless $dmid > 0 && length $classname; - # tries to get it first from cache, then reloads and tries again. - my $get = sub { - foreach my $cl ($pkg->classes_of_domain($dmid)) { - return $cl->classid if $cl->name eq $classname; - } - return undef; - }; - my $id = $get->(); - return $id if $id; - MogileFS::Class->reload_classes; - return $get->(); -} +# Instance methods: -sub classes_of_domain { - my ($pkg, $doma) = @_; - my $dmid = ref $doma ? $doma->id : $doma; - $pkg->check_cache; - return () unless $dmid && $singleton{$dmid}; - return values %{ $singleton{$dmid} }; -} +sub id { $_[0]{classid} } +sub name { $_[0]{classname} } +sub mindevcount { $_[0]{mindevcount} } +sub dmid { $_[0]{dmid} } +sub hashtype { $_[0]{hashtype} } +sub hashname { $MogileFS::Checksum::TYPE2NAME{$_[0]{hashtype}} } -# throws 'dup' on duplicate name, returns class otherwise -sub create_class { - my ($pkg, $dom, $clname) = @_; - my $clid = Mgd::get_store()->create_class($dom->id, $clname); - return $pkg->of_dmid_classid($dom->id, $clid); +sub hashtype_string { + my $self = shift; + $self->hashtype ? $self->hashname : "NONE"; } -# -------------------------------------------------------------------------- -# Instance methods: -# -------------------------------------------------------------------------- - -sub domainid { $_[0]{dmid} } -sub classid { $_[0]{classid} } -sub mindevcount { $_[0]{mindevcount} } - sub repl_policy_string { my $self = shift; - # if they've actually configured one, it gets used: - return $self->{replpolicy} if $self->{replpolicy}; - # else, the historical default: - return "MultipleHosts()"; + return $self->{replpolicy} ? $self->{replpolicy} + : 'MultipleHosts()'; } sub repl_policy_obj { my $self = shift; - return $self->{_repl_policy_obj} if $self->{_repl_policy_obj}; - my $polstr = $self->repl_policy_string; - # parses it: - my $pol = MogileFS::ReplicationPolicy->new_from_policy_string($polstr); - return $self->{_repl_policy_obj} = $pol; + if (! $self->{_repl_policy_obj}) { + my $polstr = $self->repl_policy_string; + # Parses the string. + $self->{_repl_policy_obj} = + MogileFS::ReplicationPolicy->new_from_policy_string($polstr); + } + return $self->{_repl_policy_obj}; } -sub name { $_[0]{name} } - sub domain { my $self = shift; - return MogileFS::Domain->of_dmid($self->domainid); + return $self->{domain_factory}->get_by_id($self->{dmid}); } -# throws 'dup' (for name conflict), returns 1 otherwise -sub set_name { - my ($self, $name) = @_; - return 1 if $self->name eq $name; - Mgd::get_store()->update_class_name(dmid => $self->domainid, - classid => $self->classid, - classname => $name); - $self->{name} = $name; - MogileFS::Class->invalidate_cache; - return 1; -} - -sub set_mindevcount { - my ($self, $n) = @_; - return 1 if $self->mindevcount == $n; - Mgd::get_store()->update_class_mindevcount(dmid => $self->domainid, - classid => $self->classid, - mindevcount => $n); - $self->{mindevcount} = $n; - MogileFS::Class->invalidate_cache; - return 1; -} - -sub set_replpolicy { - my ($self, $pol) = @_; - return 1 if $self->repl_policy_string eq $pol; - Mgd::get_store()->update_class_replpolicy(dmid => $self->domainid, - classid => $self->classid, - replpolicy => $pol); - $self->{replpolicy} = $pol; - MogileFS::Class->invalidate_cache; - return 1; - -} - -# throws: -# 'has_files' -sub delete { +sub has_files { my $self = shift; - throw("has_files") if $self->has_files; - Mgd::get_store()->delete_class($self->domainid, $self->classid); - MogileFS::Class->invalidate_cache; - return 1; + return Mgd::get_store()->class_has_files($self->{dmid}, $self->id); } -sub has_files { - my $self = shift; - return Mgd::get_store()->class_has_files($self->domainid, $self->classid); +sub observed_fields { + return {}; } 1; diff --git a/lib/MogileFS/Config.pm b/lib/MogileFS/Config.pm index 1861910a..4e3e6691 100644 --- a/lib/MogileFS/Config.pm +++ b/lib/MogileFS/Config.pm @@ -20,6 +20,8 @@ use constant REBAL_QUEUE => 2; use constant DEVICE_SUMMARY_CACHE_TIMEOUT => 15; my %conf; +my %server_settings; +my $has_cached_settings = 0; sub set_config { shift if @_ == 3; my ($k, $v) = @_; @@ -27,7 +29,7 @@ sub set_config { # if a child, propagate to parent if (my $worker = MogileFS::ProcManager->is_child) { $worker->send_to_parent(":set_config_from_child $k $v"); - } else { + } elsif (defined $v) { MogileFS::ProcManager->send_to_all_children(":set_config_from_parent $k $v"); } @@ -41,6 +43,7 @@ sub set_config_no_broadcast { } set_config('default_mindevcount', 2); +set_config('min_fidid', 0); our ( %cmdline, @@ -59,11 +62,13 @@ our ( $fsck_jobs, $reaper_jobs, $monitor_jobs, + $job_master, # boolean $max_handles, $min_free_space, $max_disk_age, $node_timeout, # time in seconds to wait for storage node responses $conn_timeout, # time in seconds to wait for connection to storage node + $conn_pool_size, # size of the HTTP connection pool $pidfile, $repl_use_get_port, $local_network, @@ -95,6 +100,7 @@ sub load_config { 'default_mindevcount=i' => \$cmdline{default_mindevcount}, 'node_timeout=i' => \$cmdline{node_timeout}, 'conn_timeout=i' => \$cmdline{conn_timeout}, + 'conn_pool_size=i' => \$cmdline{conn_pool_size}, 'max_handles=i' => \$cmdline{max_handles}, 'pidfile=s' => \$cmdline{pidfile}, 'no_schema_check' => \$cmdline{no_schema_check}, @@ -102,6 +108,7 @@ sub load_config { 'repl_use_get_port=i' => \$cmdline{repl_use_get_port}, 'local_network=s' => \$cmdline{local_network}, 'mogstored_stream_port' => \$cmdline{mogstored_stream_port}, + 'job_master!' => \$cmdline{job_master}, ); # warn of old/deprecated options @@ -151,6 +158,7 @@ sub load_config { $replicate_jobs = choose_value( 'replicate_jobs', 1 ); $fsck_jobs = choose_value( 'fsck_jobs', 1 ); $reaper_jobs = choose_value( 'reaper_jobs', 1 ); + $job_master = choose_value( 'job_master', 1 ); $monitor_jobs = choose_value( 'monitor_jobs', 1 ); $min_free_space = choose_value( 'min_free_space', 100 ); $max_disk_age = choose_value( 'max_disk_age', 5 ); @@ -162,6 +170,7 @@ sub load_config { choose_value( 'default_mindevcount', 2 ); $node_timeout = choose_value( 'node_timeout', 2 ); $conn_timeout = choose_value( 'conn_timeout', 2 ); + $conn_pool_size = choose_value( 'conn_pool_size', 20 ); choose_value( 'rebalance_ignore_missing', 0 ); $repl_use_get_port = choose_value( 'repl_use_get_port', 0 ); @@ -235,6 +244,17 @@ Details: [sto=$sto, err=$@] } $sto->pre_daemonize_checks; + + # If MySQL gets restarted InnoDB may reset its auto_increment counter. If + # the first few fids have been deleted, the "reset to max on duplicate" + # code won't fire immediately. + # Instead, we also trigger it if a configured "min_fidid" is higher than + # what we got from innodb. + # For bonus points: This value should be periodically updated, in case the + # trackers don't go down as often as the database. + my $min_fidid = $sto->max_fidid; + $min_fidid = 0 unless $min_fidid; + set_config('min_fidid', $min_fidid); } # set_server_setting( key, value ) @@ -256,9 +276,23 @@ sub server_setting { return Mgd::get_store()->server_setting($key); } +sub cache_server_setting { + my ($class, $key, $val) = @_; + $has_cached_settings++ unless $has_cached_settings; + if (! defined $val) { + delete $server_settings{$key} + if exists $server_settings{$key}; + } + $server_settings{$key} = $val; +} + sub server_setting_cached { - my ($class, $key, $timeout) = @_; - return Mgd::get_store()->server_setting_cached($key, $timeout); + my ($class, $key, $fallback) = @_; + $fallback = 1 unless (defined $fallback); + if (!$has_cached_settings && $fallback) { + return MogileFS::Config->server_setting($key); + } + return $server_settings{$key}; } my $memc; @@ -266,16 +300,19 @@ my $last_memc_server_fetch = 0; my $have_memc_module = eval "use Cache::Memcached; 1;"; sub memcache_client { return undef unless $have_memc_module; - $memc ||= Cache::Memcached->new; # only reload the server list every 30 seconds my $now = time(); return $memc if $last_memc_server_fetch > $now - 30; - my @servers = split(/\s*,\s*/, MogileFS::Config->server_setting("memcache_servers") || ""); - $memc->set_servers(\@servers); + my @servers = grep(/:\d+$/, split(/\s*,\s*/, MogileFS::Config->server_setting_cached("memcache_servers") || "")); $last_memc_server_fetch = $now; + return ($memc = undef) unless @servers; + + $memc ||= Cache::Memcached->new; + $memc->set_servers(\@servers); + return $memc; } @@ -286,6 +323,7 @@ sub hostname { sub server_setting_is_readable { my ($class, $key) = @_; + return 1 if $key eq 'fsck_checksum'; return 0 if $key =~ /^fsck_/; return 1; } @@ -333,26 +371,30 @@ sub server_setting_is_writable { # let slave settings go through unmodified, for now. if ($key =~ /^slave_/) { return $del_if_blank }; - if ($key eq "enable_rebalance") { return $bool }; + if ($key eq "skip_devcount") { return $bool }; + if ($key eq "skip_mkcol") { return $bool }; + if ($key eq "case_sensitive_list_keys") { return $bool }; if ($key eq "memcache_servers") { return $any }; + if ($key eq "memcache_ttl") { return $num }; if ($key eq "internal_queue_limit") { return $num }; # ReplicationPolicy::MultipleNetworks if ($key eq 'network_zones') { return $any }; if ($key =~ /^zone_/) { return $valid_netmask_list }; - if ($key eq "rebalance_policy") { return sub { - my $v = shift; - return undef unless $v; - # TODO: actually load the provided class and test if it loads? - die "Doesn't match acceptable format" unless - $v =~ /^[\w:\-]+$/; - return $v; - }} - # should probably restrict to (\d+) if ($key =~ /^queue_/) { return $any }; + if ($key eq "fsck_checksum") { + return sub { + my $v = shift; + return "off" if $v eq "off"; + return undef if $v eq "class"; + return $v if MogileFS::Checksum->valid_alg($v); + die "Not a valid checksum algorithm"; + } + } + return 0; } diff --git a/lib/MogileFS/Connection/Client.pm b/lib/MogileFS/Connection/Client.pm index f4a36eb6..a4981be9 100644 --- a/lib/MogileFS/Connection/Client.pm +++ b/lib/MogileFS/Connection/Client.pm @@ -7,29 +7,101 @@ package MogileFS::Connection::Client; use strict; use Danga::Socket (); use base qw{Danga::Socket}; +use IO::Handle; +use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); +use MogileFS::Util qw(error); -use fields qw{read_buf}; +use fields qw{read_buf pipelined}; + +my %SLOW_WRITERS = (); +my $EXPTIME = 120; +my $PIPELINE = []; + +sub Reset { + %SLOW_WRITERS = (); + $PIPELINE = []; +} + +sub WriterWatchDog { + my $dmap = Danga::Socket->DescriptorMap; + my $old = clock_gettime(CLOCK_MONOTONIC) - $EXPTIME; + foreach my $fd (keys %SLOW_WRITERS) { + my $last_write = $SLOW_WRITERS{$fd}; + next if $last_write > $old; + + if (my $ds = $dmap->{$fd}) { + error('write timeout expired: '.$ds->as_string); + $ds->close; + } else { + error("fd=$fd not known to Danga::Socket(!?), ignoring"); + } + delete $SLOW_WRITERS{$fd}; + } +} + +sub ProcessPipelined { + my $run = $PIPELINE; + $PIPELINE = []; + foreach my MogileFS::Connection::Client $clref (@$run) { + $clref->{pipelined} = undef; + $clref->process_request or $clref->watch_read(1); + } +} sub new { my $self = shift; $self = fields::new($self) unless ref $self; $self->SUPER::new( @_ ); + IO::Handle::blocking($self->{sock}, 0); + delete $SLOW_WRITERS{$self->{fd}}; $self->watch_read(1); return $self; } # Client + +sub process_request { + my MogileFS::Connection::Client $self = shift; + + while ($self->{read_buf} =~ s/^(.*?)\r?\n//) { + next unless length $1; + $self->handle_request($1); + return 1; + } + 0; +} + sub event_read { my MogileFS::Connection::Client $self = shift; my $bref = $self->read(1024); return $self->close unless defined $bref; $self->{read_buf} .= $$bref; + if ($self->process_request) { + $self->watch_read(0); + } +} - while ($self->{read_buf} =~ s/^(.*?)\r?\n//) { - next unless length $1; - $self->handle_request($1); +sub write { + my MogileFS::Connection::Client $self = shift; + my $done = $self->SUPER::write(@_); + my $fd = $self->{fd}; + if ($done) { + if (defined $fd) { + delete $SLOW_WRITERS{$fd}; + unless ($self->{pipelined}) { + $self->{pipelined} = 1; + push @$PIPELINE, $self; + } + } + } else { + # stop reading if we can't write, otherwise we'll OOM + if (defined $fd) { + $SLOW_WRITERS{$fd} = clock_gettime(CLOCK_MONOTONIC); + $self->watch_read(0); + } } + $done; } sub handle_request { @@ -88,12 +160,16 @@ sub handle_admin_command { $count = 500 if $count > 500; # now make sure it's a real job - if (MogileFS::ProcManager->is_valid_job($job)) { - MogileFS::ProcManager->request_job_process($job, $count); - push @out, "Now desiring $count children doing '$job'."; + if (MogileFS::ProcManager->is_monitor_good) { + if (MogileFS::ProcManager->is_valid_job($job)) { + MogileFS::ProcManager->request_job_process($job, $count); + push @out, "Now desiring $count children doing '$job'."; + } else { + my $classes = join(", ", MogileFS::ProcManager->valid_jobs); + push @out, "ERROR: Invalid class '$job'. Valid classes: $classes"; + } } else { - my $classes = join(", ", MogileFS::ProcManager->valid_jobs); - push @out, "ERROR: Invalid class '$job'. Valid classes: $classes"; + push @out, "ERROR: Monitor has not completed initial run yet\n"; } } else { push @out, "ERROR: usage: !want "; @@ -136,8 +212,8 @@ sub handle_admin_command { MogileFS::ProcManager->SendHelp($self, $args); } - $self->write(join("\r\n", @out) . "\r\n") if @out; - $self->write(".\r\n"); + push @out, '.', ''; + $self->write(join("\r\n", @out)); return; } diff --git a/lib/MogileFS/Connection/HTTP.pm b/lib/MogileFS/Connection/HTTP.pm new file mode 100644 index 00000000..3ac75412 --- /dev/null +++ b/lib/MogileFS/Connection/HTTP.pm @@ -0,0 +1,270 @@ +package MogileFS::Connection::HTTP; +use strict; +use warnings; +use MogileFS::Connection::Poolable; +use HTTP::Response; +use base qw(MogileFS::Connection::Poolable); +use MogileFS::Util qw/debug/; + +use fields ( + 'read_size_hint', # bytes to read for body + 'http_response', # HTTP::Response object + 'http_req', # HTTP request ("GET $URL") + 'http_res_cb', # called on HTTP::Response (_after_ body is read) + 'http_res_body_read',# number of bytes read in the response body + 'http_res_content_cb' # filter for the response body (success-only) +); +use Net::HTTP::NB; + +sub new { + my ($self, $ip, $port) = @_; + my %opts = ( Host => "$ip:$port", Blocking => 0, KeepAlive => 300 ); + my $sock = Net::HTTP::NB->new(%opts) or return; + + $self = fields::new($self) unless ref $self; + $self->SUPER::new($sock, $ip, $port); # MogileFS::Connection::Poolable->new + + return $self; +} + +# starts an HTTP request, returns immediately and relies on Danga::Socket +# to schedule the run the callback. +sub start { + my ($self, $method, $path, $opts, $http_res_cb) = @_; + $opts ||= {}; + + my $err = delete $self->{mfs_err}; + return $self->err_response($err, $http_res_cb) if $err; + + $self->{http_res_cb} = $http_res_cb; + $self->{http_res_content_cb} = $opts->{content_cb}; + $self->{read_size_hint} = $opts->{read_size_hint} || 4096; + + my $h = $opts->{headers} || {}; + $h->{'User-Agent'} = ref($self) . "/$MogileFS::Server::VERSION"; + my $content = $opts->{content}; + if (defined $content) { + # Net::HTTP::NB->format_request will set Content-Length for us + $h->{'Content-Type'} = 'application/octet-stream' + } else { + $content = ""; + } + + # support full URLs for LWP compatibility + # some HTTP daemons don't support Absolute-URIs, so we only give + # them the HTTP/1.0-compatible path + if ($path =~ m{\Ahttps?://[^/]+(/.*)\z}) { + $path = $1; + } + + $self->set_timeout("node_timeout"); + + # Force HTTP/1.0 to avoid potential chunked responses and force server + # to set Content-Length: instead. In practice, we'll never get chunked + # responses anyways as all known DAV servers will set Content-Length + # for static files... + $self->sock->http_version($method eq "GET" ? "1.0" : "1.1"); + $h->{Connection} = "keep-alive"; + + # format the request here since it sets the reader up to read + my $req = $self->sock->format_request($method, $path, %$h, $content); + $self->{http_req} = "$method http://" . $self->key . $path; + + # we'll start watching for writes here since it's unlikely the + # 3-way handshake for new TCP connections is done at this point + $self->write($req); + + # start reading once we're done writing + $self->write(sub { + # we're connected after writing $req is successful, so + # change the timeout and wait for readability + $self->set_timeout("node_timeout"); + $self->watch_read(1); + }); +} + +# called by Danga::Socket upon readability +sub event_read { + my ($self) = @_; + + my $content_cb = $self->{http_res_content_cb}; + my Net::HTTP::NB $sock = $self->sock; + my $res = $self->{http_response}; + + # read and cache HTTP response headers + unless ($res) { + my ($code, $mess, @headers) = eval { $sock->read_response_headers }; + + # wait for readability on EAGAIN + unless (defined $code) { + my $err = $@; + if ($err) { + $err =~ s/ at .*\z//s; # do not expose source file location + $err =~ s/\r?\n/\\n/g; # just in case + return $self->err("read_response_headers: $err"); + } + + # assume EAGAIN, though $! gets clobbered by Net::HTTP::* + return; + } + + # hold onto response object until the response body is processed + $res = HTTP::Response->new($code, $mess, \@headers, ""); + $res->protocol("HTTP/" . $sock->peer_http_version); + $self->{http_response} = $res; + $self->{http_res_body_read} = $content_cb ? 0 : undef; + } + + my $body_read = sub { + $content_cb ? $self->{http_res_body_read} : length($res->content); + }; + + # continue reading the response body if we have a header + my $rsize = $self->{read_size_hint}; + my $buf; + + my $clen = $res->header("Content-Length"); + while (1) { + my $n = $sock->read_entity_body($buf, $rsize); + if (!defined $n) { + if ($!{EAGAIN}) { + # workaround a bug in Net::HTTP::NB + # ref: https://rt.cpan.org/Ticket/Display.html?id=78233 + if (defined($clen) && $clen == $body_read->()) { + return $self->_http_done; + } + + # reset the timeout if we got any body bytes + $self->set_timeout("node_timeout"); + return; + } + next if $!{EINTR}; + return $self->err("read_entity_body: $!"); + } + if ($n == 0) { + # EOF, call the response header callback + return $self->_http_done; + } + if ($n > 0) { + if ($content_cb && $res->is_success) { + $self->{http_res_body_read} += length($buf); + + # filter the buffer through content_cb, no buffering. + # This will be used by tracker-side checksumming + # replicate does NOT use this code path for performance + # reasons (tracker-side checksumming is already a slow path, + # so there's little point in optimizing). + # $buf may be empty on EOF (n == 0) + $content_cb->($buf, $self, $res); + + if (defined($clen) && $clen == $body_read->()) { + return $self->_http_done; + } + } else { + # append to existing buffer, this is only used for + # PUT/DELETE/HEAD and small GET responses (monitor) + $res->content($res->content . $buf); + } + # continue looping until EAGAIN or EOF (n == 0) + } + } +} + +# this does cleanup as an extra paranoid step to prevent circular refs +sub close { + my ($self, $close_reason) = @_; + + delete $self->{http_res_cb}; + delete $self->{http_res_content_cb}; + + $self->SUPER::close($close_reason); # MogileFS::Connection::Poolable->close +} + +# This is only called on a socket-level error (e.g. disconnect, timeout) +# bad server responses (500, 403) do not trigger this +sub err { + my ($self, $reason) = @_; + + # Fake an HTTP response like LWP does on errors. + # delete prevents http_res_cb from being invoked twice, as event_read + # will delete http_res_cb on success, too + my $http_res_cb = delete $self->{http_res_cb}; + + # don't retry if we already got a response header nor if we got a timeout + if ($self->retryable($reason) && $http_res_cb && !$self->{http_response}) { + # do not call inflight_expire here, since we need inflight_cb + # for retrying + + $self->close(":retry"); # trigger a retry in MogileFS::ConnectionPool + } else { + # ensure we don't call new_err on close() + $self->inflight_expire; + + # free the FD before invoking the callback + $self->close($reason); + $self->err_response($reason, $http_res_cb) if $http_res_cb; + } +} + +# Fakes an HTTP response like LWP does on errors. +sub err_response { + my ($self, $err, $http_res_cb) = @_; + + my $res = HTTP::Response->new(500, $err); + $err ||= "(unspecified error)"; + my $req = $self->{http_req} || "no HTTP request made"; + Mgd::error("$err: $req"); + $res->header("X-MFS-Error", $err); + $res->protocol("HTTP/1.0"); + $http_res_cb->($res); +} + +# returns true if the HTTP connection is persistent/reusable, false if not. +sub _http_persistent { + my ($self, $res) = @_; + + # determine if this connection is reusable: + my $connection = $res->header("Connection"); + my $persist; + + # Connection: header takes precedence over protocol version + if ($connection) { + if ($connection =~ /\bkeep-alive\b/i) { + $persist = 1; + } elsif ($connection =~ /\bclose\b/i) { + $persist = 0; + } + + # if we can't make sense of the Connection: header, fall through + # and decided based on protocol version + } + + # HTTP/1.1 is persistent-by-default, HTTP/1.0 is not. + # Will there be HTTP/1.2? + $persist = $res->protocol eq "HTTP/1.1" unless defined $persist; + + # we're not persistent if the pool is full, either + return ($persist && $self->persist); +} + +# Called on successfully read HTTP response (it could be a server-side +# error (404,403,500...), but not a socket error between client<->server). +sub _http_done { + my ($self) = @_; + + # delete ensures we only fire the callback once + my $http_res_cb = delete $self->{http_res_cb}; + my $res = delete $self->{http_response}; + delete $self->{http_req}; + + # ensure we don't call new_err on eventual close() + $self->inflight_expire; + + # free up the FD if possible + $self->close('http_close') unless $self->_http_persistent($res); + + # finally, invoke the user-supplied callback + $http_res_cb->($res); +} + +1; diff --git a/lib/MogileFS/Connection/Mogstored.pm b/lib/MogileFS/Connection/Mogstored.pm index d7f52cb1..c071389b 100644 --- a/lib/MogileFS/Connection/Mogstored.pm +++ b/lib/MogileFS/Connection/Mogstored.pm @@ -1,6 +1,7 @@ package MogileFS::Connection::Mogstored; use strict; use IO::Socket::INET; +use Socket qw(SO_KEEPALIVE); sub new { my ($class, $ip, $port) = @_; @@ -15,13 +16,10 @@ sub new { sub sock { my ($self, $timeout) = @_; return $self->{sock} if $self->{sock}; - return $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, - PeerPort => $self->{port}, - Timeout => $timeout); -} - -sub sock_if_connected { - my $self = shift; + $self->{sock} = IO::Socket::INET->new(PeerAddr => $self->{ip}, + PeerPort => $self->{port}, + Timeout => $timeout) or die "Could not connect to mogstored on ".$self->{ip}.":".$self->{port}; + $self->{sock}->sockopt(SO_KEEPALIVE, 1); return $self->{sock}; } diff --git a/lib/MogileFS/Connection/Parent.pm b/lib/MogileFS/Connection/Parent.pm new file mode 100644 index 00000000..d28ffa65 --- /dev/null +++ b/lib/MogileFS/Connection/Parent.pm @@ -0,0 +1,35 @@ +package MogileFS::Connection::Parent; +# maintains a connection in a worker process to the parent ProcManager process +# Only used by workers that use the Danga::Socket->EventLoop internally +# currently only Monitor +use warnings; +use strict; +use Danga::Socket (); +use base qw{Danga::Socket}; +use fields qw(worker); + +sub new { + my ($self, $worker) = @_; + $self = fields::new($self) unless ref $self; + $self->SUPER::new($worker->psock); + $self->{worker} = $worker; + + return $self; +} + +sub ping { + my ($self) = @_; + + $self->write(":ping\r\n"); +} + +sub event_read { + my ($self) = @_; + + $self->{worker}->read_from_parent; +} + +sub event_hup { $_[0]->close } +sub event_err { $_[0]->close } + +1; diff --git a/lib/MogileFS/Connection/Poolable.pm b/lib/MogileFS/Connection/Poolable.pm new file mode 100644 index 00000000..86b99d48 --- /dev/null +++ b/lib/MogileFS/Connection/Poolable.pm @@ -0,0 +1,241 @@ +# private base class for poolable HTTP/Mogstored sidechannel connections +# This is currently only used by HTTP, but is intended for Mogstored +# connections, too. +package MogileFS::Connection::Poolable; +use strict; +use warnings; +use Danga::Socket; +use base qw(Danga::Socket); +use fields ( + 'mfs_pool', # owner of the connection (MogileFS::ConnectionPool) + 'mfs_hostport', # [ ip, port ] + 'mfs_expire', # Danga::Socket::Timer object + 'mfs_expire_cb', # Danga::Socket::Timer callback + 'mfs_requests', # number of requests made on this object + 'mfs_err', # used to propagate an error to start() + 'mfs_writeq', # arrayref if connecting, undef otherwise +); +use Socket qw(SO_KEEPALIVE); +use Time::HiRes; + +# subclasses (MogileFS::Connection::{HTTP,Mogstored}) must call this sub +sub new { + my ($self, $sock, $ip, $port) = @_; + $self->SUPER::new($sock); # Danga::Socket->new + + # connection may not be established, yet + # so Danga::Socket->peer_addr_string can't be used here + $self->{mfs_hostport} = [ $ip, $port ]; + $self->{mfs_requests} = 0; + + # newly-created socket, we buffer writes until event_write is triggered + $self->{mfs_writeq} = []; + + return $self; +} + +# used by ConnectionPool for tracking per-hostport connection counts +sub key { join(':', @{$_[0]->{mfs_hostport}}); } + +# backwards compatibility +sub host_port { $_[0]->key; } + +sub ip_port { @{$_[0]->{mfs_hostport}}; } + +sub fd { fileno($_[0]->sock); } + +# marks a connection as idle, call this before putting it in a connection +# pool for eventual reuse. +sub mark_idle { + my ($self) = @_; + + $self->watch_read(0); + + # set the keepalive flag the first time we're idle + $self->sock->sockopt(SO_KEEPALIVE, 1) if $self->{mfs_requests} == 0; + + $self->{mfs_requests}++; +} + +sub write { + my ($self, $arg) = @_; + my $writeq = $self->{mfs_writeq}; + + if (ref($writeq) eq "ARRAY") { + # if we're still connecting, we must buffer explicitly for *BSD + # and not attempt a real write() until event_write is triggered + push @$writeq, $arg; + $self->watch_write(1); # enable event_write triggering + 0; # match Danga::Socket::write return value + } else { + $self->SUPER::write($arg); + } +} + +# Danga::Socket will trigger this when a socket is writable +sub event_write { + my ($self) = @_; + + # we may have buffered writes in mfs_writeq during non-blocking connect(), + # this is needed on *BSD but unnecessary (but harmless) on Linux. + my $writeq = delete $self->{mfs_writeq}; + if ($writeq) { + $self->watch_write(0); # ->write will re-enable if needed + foreach my $queued (@$writeq) { + $self->write($queued); + } + } else { + $self->SUPER::event_write(); + } +} + +# the request running on this connection is retryable if this socket +# has ever been marked idle. The connection pool can never be 100% +# reliable for detecting dead sockets, and all HTTP requests made by +# MogileFS are idempotent. +sub retryable { + my ($self, $reason) = @_; + return ($reason !~ /timeout/ && $self->{mfs_requests} > 0); +} + +# Sets (or updates) the timeout of the connection +# timeout_key is "node_timeout" or "conn_timeout" +# clears the current timeout if timeout_key is undef +sub set_timeout { + my ($self, $timeout_key) = @_; + my $mfs_pool = $self->{mfs_pool}; + + $self->SetPostLoopCallback(undef); + if ($timeout_key) { + my $timeout; + + if ($timeout_key =~ /\A[a-z_]+\z/) { + $timeout = MogileFS->config($timeout_key) || 2; + } else { + $timeout = $timeout_key; + $timeout_key = "timeout"; + } + + my $t0 = Time::HiRes::time(); + $self->{mfs_expire} = $t0 + $timeout; + $self->{mfs_expire_cb} = sub { + my ($now) = @_; + my $elapsed = $now - $t0; + + # for HTTP, this will fake an HTTP error response like LWP does + $self->err("$timeout_key: $timeout (elapsed: $elapsed)"); + }; + $mfs_pool->register_timeout($self, $timeout) if $mfs_pool; + } else { + $self->{mfs_expire} = $self->{mfs_expire_cb} = undef; + $mfs_pool->register_timeout($self, undef) if $mfs_pool; + } +} + +# returns the expiry time of the connection +sub expiry { $_[0]->{mfs_expire} } + +# runs expiry callback and returns true if time is up, +# returns false if there is time remaining +sub expired { + my ($self, $now) = @_; + my $expire = $self->{mfs_expire} or return 0; + $now ||= Time::HiRes::time(); + + if ($now >= $expire) { + my $expire_cb = delete $self->{mfs_expire_cb}; + if ($expire_cb && $self->sock) { + $self->SetPostLoopCallback(sub { $expire_cb->($now); 1 }); + } + return 1; + } + return 0; +} + +# may be overriden in subclass, called only on errors +# The HTTP version of this will fake an HTTP response for LWP compatibility +sub err { + my ($self, $close_reason) = @_; + + $self->inflight_expire; # ensure we don't call new_err on eventual close() + + if ($close_reason =~ /\A:event_(?:hup|err)\z/) { + # there's a chance this can be invoked while inflight, + # conn_drop will handle this case appropriately + $self->{mfs_pool}->conn_drop($self, $close_reason) if $self->{mfs_pool}; + } else { + $self->close($close_reason); + } +} + +# sets the pool this connection belongs to, only call from ConnectionPool +sub set_pool { + my ($self, $pool) = @_; + + $self->{mfs_pool} = $pool; +} + +# closes a connection, and may reschedule the inflight callback if +# close_reason is ":retry" +sub close { + my ($self, $close_reason) = @_; + + delete $self->{mfs_expire_cb}; # avoid circular ref + + my $mfs_pool = delete $self->{mfs_pool}; # avoid circular ref + my $inflight_cb; + + if ($mfs_pool) { + $mfs_pool->schedule_queued; + $inflight_cb = $mfs_pool->conn_close_prepare($self, $close_reason); + } + $self->SUPER::close($close_reason); # Danga::Socket->close + + if ($inflight_cb && $close_reason) { + if ($close_reason eq ":retry") { + my ($ip, $port) = $self->ip_port; + + $mfs_pool->enqueue($ip, $port, $inflight_cb); + } else { + # Danga::Socket-scheduled write()s which fail with ECONNREFUSED, + # EPIPE, or "write_error" after an initial (non-blocking) + # connect() + $mfs_pool->on_next_tick(sub { + ref($self)->new_err($close_reason || "error", $inflight_cb); + }); + } + } +} + +# Marks a connection as no-longer inflight. Calling this prevents retries. +sub inflight_expire { + my ($self) = @_; + my $mfs_pool = $self->{mfs_pool}; + die "BUG: expiring without MogileFS::ConnectionPool\n" unless $mfs_pool; + $mfs_pool->inflight_cb_expire($self); +} + +# Danga::Socket callbacks +sub event_hup { $_[0]->err(':event_hup'); } +sub event_err { $_[0]->err(':event_err'); } + +# called when we couldn't create a socket, but need to create an object +# anyways for errors (creating fake, LWP-style error responses) +sub new_err { + my ($class, $err, $start_cb) = @_; + my $self = fields::new($class); + $self->{mfs_err} = $err; + # on socket errors + $start_cb->($self); +} + +# returns this connection back to its associated pool. +# Returns false if not successful (pool is full) +sub persist { + my ($self) = @_; + my $mfs_pool = $self->{mfs_pool}; + + return $mfs_pool ? $mfs_pool->conn_persist($self) : 0; +} + +1; diff --git a/lib/MogileFS/Connection/Worker.pm b/lib/MogileFS/Connection/Worker.pm index 7c053b1c..aef26d97 100644 --- a/lib/MogileFS/Connection/Worker.pm +++ b/lib/MogileFS/Connection/Worker.pm @@ -8,6 +8,7 @@ use base qw{Danga::Socket}; use fields ( 'read_buf', + 'read_size', # bigger for monitor 'job', 'pid', 'reqid', @@ -27,6 +28,7 @@ sub new { $self->{job} = undef; $self->{last_alive} = time(); $self->{known_state} = {}; + $self->{read_size} = 1024; return $self; } @@ -50,7 +52,7 @@ sub event_read { # if we read data from it, it's not blocked on something else. $self->note_alive; - my $bref = $self->read(1024); + my $bref = $self->read($self->{read_size}); return $self->close() unless defined $bref; $self->{read_buf} .= $$bref; @@ -73,7 +75,11 @@ sub event_write { sub job { my MogileFS::Connection::Worker $self = shift; return $self->{job} unless @_; - return $self->{job} = shift; + my $j = shift; + + # monitor may send huge state events (which we send to everyone else) + $self->{read_size} = Mgd::UNIX_RCVBUF_SIZE() if ($j eq 'monitor'); + $self->{job} = $j; } sub wants_todo { diff --git a/lib/MogileFS/ConnectionPool.pm b/lib/MogileFS/ConnectionPool.pm new file mode 100644 index 00000000..7d4b00f3 --- /dev/null +++ b/lib/MogileFS/ConnectionPool.pm @@ -0,0 +1,483 @@ +# a connection pool class with queueing. +# (something doesn't sound quite right with that...) +# This requires Danga::Socket to drive, but may also function without it +# via conn_get/conn_put. +package MogileFS::ConnectionPool; +use strict; +use warnings; +use Carp qw(croak confess); +use Time::HiRes; + +use constant NEVER => (0xffffffff << 32) | 0xffffffff; # portable version :P + +sub new { + my ($class, $conn_class, $opts) = @_; + + $opts ||= {}; + my $self = bless { + fdmap => {}, # { fd -> conn } + idle => {}, # ip:port -> [ MogileFS::Connection::Poolable, ... ] + queue => [], # [ [ ip, port, callback ], ... ] + timer => undef, # Danga::Socket::Timer object + timeouts => {}, # { fd -> conn } + inflight => {}, # ip:port -> { fd -> callback } + total_inflight => 0, # number of inflight connections + dest_capacity => $opts->{dest_capacity}, + total_capacity => $opts->{total_capacity}, + class => $conn_class, + scheduled => 0, # set if we'll start tasks on next tick + on_next_tick => [], + next_expiry => NEVER, + }, $class; + + # total_capacity=20 matches what we used with LWP + $self->{total_capacity} ||= 20; + + # allow users to specify per-destination capacity limits + $self->{dest_capacity} ||= $self->{total_capacity}; + + return $self; +} + +# retrieves an idle connection for the [IP, port] pair +sub _conn_idle_get { + my ($self, $ip, $port) = @_; + + my $key = "$ip:$port"; + my $idle = $self->{idle}->{$key} or return; + + # the Danga::Socket event loop may detect hangups and close sockets, + # However not all MFS workers run this event loop, so we need to + # validate the connection when retrieving a connection from the pool + while (my $conn = pop @$idle) { + # make sure the socket is valid: + + # due to response callback ordering, we actually place connections + # in the pool before invoking the user-supplied response callback + # (to allow connections to get reused ASAP) + my $sock = $conn->sock or next; + + # hope this returns EAGAIN, not using OO->sysread here since + # Net::HTTP::NB overrides that and we _want_ to hit EAGAIN here + my $r = sysread($sock, my $byte, 1); + + # good, connection is possibly still alive if we got EAGAIN + return $conn if (!defined $r && $!{EAGAIN}); + + my $err = $!; + if (defined $r) { + if ($r == 0) { + # a common case and to be expected + $err = "server dropped idle connection"; + } else { + # this is a bug either on our side or the HTTP server + Mgd::error("Bug: unexpected got $r bytes from idle conn to ". $conn->host_port. ") (byte=$byte)"); + } + } + + # connection is bad, close the socket and move onto the + # next idle connection if there is one. + $conn->close($err); + } + + return; +} + +# creates a new connection if under capacity +# returns undef if we're at capacity (or on EMFILE/ENFILE) +sub _conn_new_maybe { + my ($self, $ip, $port) = @_; + my $key = "$ip:$port"; + + # we only call this sub if we don't have idle connections, so + # we don't check {idle} here + + # make sure we're not already at capacity for this destination + my $nr_inflight = scalar keys %{$self->{inflight}->{$key} ||= {}}; + return if ($nr_inflight >= $self->{dest_capacity}); + + # see how we're doing with regard to total capacity: + if ($self->_total_connections >= $self->{total_capacity}) { + # see if we have idle connections for other pools to kill + if ($self->{total_inflight} < $self->{total_capacity}) { + # we have idle connections to other destinations, drop one of those + $self->_conn_drop_idle; + # fall through to creating a new connection + } else { + # we're at total capacity for the entire pool + return; + } + } + + # we're hopefully under capacity if we got here, create a new connection + $self->_conn_new($ip, $port); +} + +# creates new connection and registers it in our fdmap +# returns error string if resources (FDs, buffers) aren't available +sub _conn_new { + my ($self, $ip, $port) = @_; + + # calls MogileFS::Connection::{HTTP,Mogstored}->new: + my $conn = $self->{class}->new($ip, $port); + if ($conn) { + # register the connection + $self->{fdmap}->{$conn->fd} = $conn; + $conn->set_pool($self); + + return $conn; + } else { + # EMFILE/ENFILE should never happen as the capacity for this + # pool is far under the system defaults. Just give up on + # EMFILE/ENFILE like any other error. + return "failed to create socket to $ip:$port ($!)"; + } +} + +# retrieves a connection, may return undef if at capacity +sub _conn_get { + my ($self, $ip, $port) = @_; + + # if we have idle connections, always use them first + $self->_conn_idle_get($ip, $port) || $self->_conn_new_maybe($ip, $port); +} + +# Pulls a connection out of the pool for synchronous use. +# This may create a new connection (independent of pool limits). +# The connection returned by this is _blocking_. This is currently +# only used by replicate. +sub conn_get { + my ($self, $ip, $port) = @_; + my $conn = $self->_conn_idle_get($ip, $port); + + if ($conn) { + # in case the connection never comes back, let refcounting close() it: + delete $self->{fdmap}->{$conn->fd}; + } else { + $conn = $self->_conn_new($ip, $port); + unless (ref $conn) { + $! = $conn; # $conn is an error message :< + return; + } + delete $self->{fdmap}->{$conn->fd}; + my $timeout = MogileFS->config("node_timeout"); + MogileFS::Util::wait_for_writeability($conn->fd, $timeout) or return; + } + + return $conn; +} + +# retrieves a connection from the connection pool and executes +# inflight_cb on it. If the pool is at capacity, this will queue the task. +# This relies on Danga::Socket->EventLoop +sub start { + my ($self, $ip, $port, $inflight_cb) = @_; + + my $conn = $self->_conn_get($ip, $port); + if ($conn) { + $self->_conn_run($conn, $inflight_cb); + } else { # we're too busy right now, queue up + $self->enqueue($ip, $port, $inflight_cb); + } +} + +# returns the total number of connections we have +sub _total_connections { + my ($self) = @_; + return scalar keys %{$self->{fdmap}}; +} + +# marks a connection as no longer inflight, returns the inflight +# callback if the connection was active, undef if not +sub inflight_cb_expire { + my ($self, $conn) = @_; + my $inflight_cb = delete $self->{inflight}->{$conn->key}->{$conn->fd}; + $self->{total_inflight}-- if $inflight_cb; + + return $inflight_cb; +} + +# schedules the event loop to dequeue and run a task on the next +# tick of the Danga::Socket event loop. Call this +# 1) whenever a task is enqueued +# 2) whenever a task is complete +sub schedule_queued { + my ($self) = @_; + + # AddTimer(0) to avoid potential stack overflow + $self->{scheduled} ||= Danga::Socket->AddTimer(0, sub { + $self->{scheduled} = undef; + my $queue = $self->{queue}; + + my $total_capacity = $self->{total_capacity}; + my $i = 0; + + while ($self->{total_inflight} < $total_capacity + && $i <= (scalar(@$queue) - 1)) { + my ($ip, $port, $cb) = @{$queue->[$i]}; + + my $conn = $self->_conn_get($ip, $port); + if ($conn) { + splice(@$queue, $i, 1); # remove from queue + $self->_conn_run($conn, $cb); + } else { + # this queue object cannot be dequeued, skip it for now + $i++; + } + } + }); +} + +# Call this when done using an (inflight) connection +# This possibly places a connection in the connection pool. +# This will close the connection of the pool is already at capacity. +# This will also start the next queued callback, or retry if needed +sub conn_persist { + my ($self, $conn) = @_; + + # schedule the next request if we're done with any connection + $self->schedule_queued; + $self->conn_put($conn); +} + +# The opposite of conn_get, this returns a connection retrieved with conn_get +# back to the connection pool, making it available for future use. Dead +# connections are not stored. +# This is currently only used by replicate. +sub conn_put { + my ($self, $conn) = @_; + + my $key = $conn->key; + # we do not store dead connections + my $peer_addr = $conn->peer_addr_string; + + if ($peer_addr) { + # connection is still alive, respect capacity limits + my $idle = $self->{idle}->{$key} ||= []; + + # register it in the fdmap just in case: + $self->{fdmap}->{$conn->fd} = $conn; + + if ($self->_dest_total($conn) < $self->{dest_capacity}) { + $conn->mark_idle; + push @$idle, $conn; # yay, connection is reusable + $conn->set_timeout(undef); # clear timeout + return 1; # success + } + } + + # we have too many connections or the socket is dead, caller + # should close after returning from this function. + return 0; +} + +# enqueues a request (inflight_cb) and schedules it to run ASAP +# This must be used with Danga::Socket->EventLoop +sub enqueue { + my ($self, $ip, $port, $inflight_cb) = @_; + + push @{$self->{queue}}, [ $ip, $port, $inflight_cb ]; + + # we have something in the queue, make sure it's run soon + $self->schedule_queued; +} + +# returns the total connections to the host of a given connection +sub _dest_total { + my ($self, $conn) = @_; + my $key = $conn->key; + my $inflight = scalar keys %{$self->{inflight}->{$key}}; + my $idle = scalar @{$self->{idle}->{$key}}; + return $idle + $inflight; +} + +# only call this from the event_hup/event_err callbacks used by Danga::Socket +sub conn_drop { + my ($self, $conn, $close_reason) = @_; + + my $fd = $conn->fd; + my $key = $conn->key; + + # event_read must handle errors anyways, so hand off + # error handling to the event_read callback if inflight. + return $conn->event_read if $self->{inflight}->{$key}->{$fd}; + + # we get here if and only if the socket is idle, we can drop it ourselves + # splice out the socket we're closing from the idle pool + my $idle = $self->{idle}->{$key}; + foreach my $i (0 .. (scalar(@$idle) - 1)) { + my $old = $idle->[$i]; + if ($old->sock) { + if ($old->fd == $fd) { + splice(@$idle, $i, 1); + $conn->close($close_reason); + return; + } + } else { + # some connections may have expired but not been spliced out, yet + # splice it out here since we're iterating anyways + splice(@$idle, $i, 1); + } + } +} + +# unregisters and prepares connection to be closed +# Returns the inflight callback if there was one +sub conn_close_prepare { + my ($self, $conn, $close_reason) = @_; + + if ($conn->sock) { + my $fd = $conn->fd; + + my $valid = delete $self->{fdmap}->{$fd}; + delete $self->{timeouts}->{$fd}; + + my $inflight_cb = $self->inflight_cb_expire($conn); + + # $valid may be undef in replicate worker which removes connections + # from fdmap. However, valid==undef connections should never have + # an inflight_cb + if ($inflight_cb && !$valid) { + croak("BUG: dropping unregistered conn with callback: $conn"); + } + return $inflight_cb; + } +} + +# schedules cb to run on the next tick of the event loop, +# (immediately after this tick runs) +sub on_next_tick { + my ($self, $cb) = @_; + my $on_next_tick = $self->{on_next_tick}; + push @$on_next_tick, $cb; + + if (scalar(@$on_next_tick) == 1) { + Danga::Socket->AddTimer(0, sub { + # prevent scheduled callbacks from being called on _this_ tick + $on_next_tick = $self->{on_next_tick}; + $self->{on_next_tick} = []; + + while (my $sub = shift @$on_next_tick) { + $sub->() + } + }); + } +} + +# marks a connection inflight and invokes cb on it +# $conn may be a error string, in which case we'll invoke the user-supplied +# callback with a mock error (this mimics how LWP fakes an HTTP response +# even if the socket could not be created/connected) +sub _conn_run { + my ($self, $conn, $cb) = @_; + + if (ref $conn) { + my $inflight = $self->{inflight}->{$conn->key} ||= {}; + $inflight->{$conn->fd} = $cb; # stash callback for retrying + $self->{total_inflight}++; + $cb->($conn); + } else { + # fake an error message on the response callback + $self->on_next_tick(sub { + # fatal error creating the socket, do not queue + my $mfs_err = $conn; + $self->{class}->new_err($mfs_err, $cb); + + # onto the next request + $self->schedule_queued; + }); + } +} + +# drops an idle connection from the idle connection pool (so we can open +# another socket without incurring out-of-FD errors) +# Only call when you're certain there's a connection to drop +# XXX This is O(destinations), unfortunately +sub _conn_drop_idle { + my ($self) = @_; + my $idle = $self->{idle}; + + foreach my $val (values %$idle) { + my $conn = shift @$val or next; + + $conn->close("idle_expire") if $conn->sock; + return; + } + + confess("BUG: unable to drop an idle connection"); +} + +# checks for expired connections, this can be expensive if there +# are many concurrent connections waiting on timeouts, but still +# better than having AddTimer create a Danga::Socket::Timer object +# every time a timeout is reset. +sub check_timeouts { + my ($self) = @_; + my $timeouts = $self->{timeouts}; + my @fds = keys %$timeouts; + my $next_expiry = NEVER; + my $now = Time::HiRes::time(); + + # this is O(n) where n is concurrent connections + foreach my $fd (@fds) { + my $conn = $timeouts->{$fd}; + if ($conn->expired($now)) { + delete $timeouts->{$fd}; + } else { + # look for the next timeout + my $expiry = $conn->expiry; + if ($expiry) { + $next_expiry = $expiry if $expiry < $next_expiry; + } else { + # just in case, this may not happen... + delete $timeouts->{$fd}; + } + } + } + + # schedule the wakeup for the next timeout + if ($next_expiry == NEVER) { + $self->{timer} = undef; + } else { + my $timeout = $next_expiry - $now; + $timeout = 0 if $timeout <= 0; + $self->{timer} = Danga::Socket->AddTimer($timeout, sub { + $self->check_timeouts; + }); + } + $self->{next_expiry} = $next_expiry; +} + +# registers a timeout for a given connection, each connection may only +# have one pending timeout. Timeout may be undef to cancel the current +# timeout. +sub register_timeout { + my ($self, $conn, $timeout) = @_; + + if ($conn->sock) { + my $fd = $conn->fd; + if ($timeout) { + $self->{timeouts}->{$fd} = $conn; + my $next_expiry = $self->{next_expiry}; + my $old_timer = $self->{timer}; + my $expiry = $timeout + Time::HiRes::time(); + + if (!$old_timer || $expiry < $next_expiry) { + $self->{next_expiry} = $expiry; + $self->{timer} = Danga::Socket->AddTimer($timeout, sub { + $self->check_timeouts; + }); + $old_timer->cancel if $old_timer; + } + } else { + delete $self->{timeouts}->{$fd}; + } + } elsif ($timeout) { # this may never happen... + # no FD, so we must allocate a new Danga::Socket::Timer object + # add 1msec to avoid FP rounding problems leading to missed + # expiration when calling conn->expired + Danga::Socket->AddTimer($timeout + 0.001, sub { $conn->expired }); + } +} + +1; diff --git a/lib/MogileFS/DevFID.pm b/lib/MogileFS/DevFID.pm index 6211a905..52bffc19 100644 --- a/lib/MogileFS/DevFID.pm +++ b/lib/MogileFS/DevFID.pm @@ -1,6 +1,7 @@ package MogileFS::DevFID; use strict; use warnings; +use MogileFS::Server; use overload '""' => \&as_string; use Carp qw(croak); @@ -25,7 +26,7 @@ sub as_string { sub device { my $self = shift; - return $self->{dev} ||= MogileFS::Device->of_devid($self->{devid}); + return $self->{dev} ||= Mgd::device_factory()->get_by_id($self->{devid}); } sub fid { @@ -51,20 +52,45 @@ sub get_url { } sub vivify_directories { - my $self = shift; + my ($self, $cb) = @_; my $url = $self->url; - MogileFS::Device->vivify_directories($url); + $self->device()->vivify_directories($url, $cb); } # returns 0 on missing, # undef on connectivity error, # else size of file on disk (after HTTP HEAD or mogstored stat) +# invokes $cb on the size if $cb is supplied (and Danga::Socket->EventLoop runs) sub size_on_disk { - my $self = shift; + my ($self, $cb) = @_; + + if ($self->device->should_read_from) { + my $url = $self->get_url; + my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); + + # check that it has size (>0) and is reachable (not undef) + return $httpfile->size($cb); + } else { + # monitor says we cannot read from this device, so do not try + + Danga::Socket->AddTimer(0, sub { $cb->(undef) }) if $cb; + return undef; + } +} + +# returns -1 on missing, +# undef on connectivity error, +# else checksum of file on disk (after HTTP GET or mogstored read) +sub checksum_on_disk { + my ($self, $alg, $ping_cb, $reason) = @_; + + return undef unless $self->device->should_read_from; + my $url = $self->get_url; + my $httpfile = $self->{_httpfile_get} ||= MogileFS::HTTPFile->at($url); # check that it has size (>0) and is reachable (not undef) - return MogileFS::HTTPFile->at($url)->size; + return $httpfile->digest($alg, $ping_cb, $reason); } # returns true if size seen matches fid's length @@ -105,7 +131,6 @@ sub _make_full_url { # get some information we'll need my $dev = $self->device or return undef; my $host = $dev->host or return undef; - return undef unless $host->exists; my $path = $self->uri_path; my $hostip = $host->ip; @@ -121,6 +146,9 @@ sub add_to_db { my $sto = Mgd::get_store(); if ($sto->add_fidid_to_devid($self->{fidid}, $self->{devid})) { + if (my $memc = MogileFS::Config->memcache_client) { + $memc->delete("mogdevids:$self->{fidid}"); + } return $self->fid->update_devcount(no_lock => $no_lock); } else { # was already on that device @@ -148,7 +176,7 @@ sub destroy { my $sto = Mgd::get_store(); $sto->remove_fidid_from_devid($self->fidid, $self->devid); - $sto->update_devcount($self->fidid); + $self->fid->update_devcount(no_lock => 1); } 1; diff --git a/lib/MogileFS/Device.pm b/lib/MogileFS/Device.pm index d0f82f74..0b0e58e2 100644 --- a/lib/MogileFS/Device.pm +++ b/lib/MogileFS/Device.pm @@ -1,273 +1,119 @@ package MogileFS::Device; use strict; use warnings; -use Carp qw(croak); -use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); +use Carp qw/croak/; +use MogileFS::Server; +use MogileFS::Util qw(throw); use MogileFS::Util qw(okay_args device_state error); +=head1 + +MogileFS::Device - device class + +=cut + BEGIN { my $testing = $ENV{TESTING} ? 1 : 0; eval "sub TESTING () { $testing }"; } -my %singleton; # devid -> instance -my $last_load = 0; # unixtime we last reloaded devices from database -my $all_loaded = 0; # bool: have we loaded all the devices? - -# throws "dup" on duplicate devid. returns new MogileFS::Device object on success. -# %args include devid, hostid, and status (in (alive, down, readonly)) -sub create { - my ($pkg, %args) = @_; - okay_args(\%args, qw(devid hostid status)); - my $devid = Mgd::get_store()->create_device(@args{qw(devid hostid status)}); - MogileFS::Device->invalidate_cache; - return $pkg->of_devid($devid); -} +my @observed_fields = qw/observed_state utilization reject_bad_md5/; +my @fields = (qw/hostid status weight mb_total mb_used mb_asof devid/, + @observed_fields); -sub of_devid { - my ($class, $devid) = @_; - croak("Invalid devid") unless $devid; - return $singleton{$devid} ||= bless { - devid => $devid, - no_mkcol => 0, - _loaded => 0, +sub new_from_args { + my ($class, $args, $host_factory) = @_; + my $self = bless { + host_factory => $host_factory, + %{$args}, }, $class; -} - -sub t_wipe_singletons { - %singleton = (); - $last_load = time(); # fake it -} - -sub t_init { - my ($self, $hostid, $state) = @_; - $self->{_loaded} = 1; - - my $dstate = device_state($state) or - die "Bogus state"; - - $self->{hostid} = $hostid; - $self->{status} = $state; - $self->{observed_state} = "writeable"; - - # say it's 10% full, of 1GB - $self->{mb_total} = 1000; - $self->{mb_used} = 100; -} - -sub from_devid_and_hostname { - my ($class, $devid, $hostname) = @_; - my $dev = MogileFS::Device->of_devid($devid) - or return undef; - return undef unless $dev->exists; - my $host = $dev->host; - return undef - unless $host && $host->exists && $host->hostname eq $hostname; - return $dev; -} - -sub vivify_directories { - my ($class, $path) = @_; - - # $path is something like: - # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid - - # three directories we'll want to make: - # http://10.0.0.26:7500/dev2/0 - # http://10.0.0.26:7500/dev2/0/000 - # http://10.0.0.26:7500/dev2/0/000/148 - - croak "non-HTTP mode no longer supported" unless $path =~ /^http/; - return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; - my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); - - my $dev = MogileFS::Device->of_devid($devid); - return 0 unless $dev->exists; - - $dev->create_directory("/dev$devid/$p1"); - $dev->create_directory("/dev$devid/$p1/$p2"); - $dev->create_directory("/dev$devid/$p1/$p2/$p3"); -} - -# returns array of all MogileFS::Device objects -sub devices { - my $class = shift; - MogileFS::Device->check_cache; - return values %singleton; -} -# returns hashref of devid -> $device_obj -# you're allowed to mess with this returned hashref -sub map { - my $class = shift; - my $ret = {}; - foreach my $d (MogileFS::Device->devices) { - $ret->{$d->id} = $d; - } - return $ret; -} + # FIXME: No guarantee (as of now?) that hosts get loaded before devs. + #$self->host || die "No host for $self->{devid} (host $self->{hostid})"; -sub reload_devices { - my $class = shift; - - # mark them all invalid for now, until they're reloaded - foreach my $dev (values %singleton) { - $dev->{_loaded} = 0; - } - - MogileFS::Host->check_cache; - - my $sto = Mgd::get_store(); - foreach my $row ($sto->get_all_devices) { - my $dev = - MogileFS::Device->of_devid($row->{devid}); - $dev->absorb_dbrow($row); - } - - # get rid of ones that could've gone away: - foreach my $devid (keys %singleton) { - my $dev = $singleton{$devid}; - delete $singleton{$devid} unless $dev->{_loaded} - } - - $all_loaded = 1; - $last_load = time(); -} - -sub invalidate_cache { - my $class = shift; - - # so next time it's invalid and won't be used old - $last_load = 0; - $all_loaded = 0; - $_->{_loaded} = 0 foreach values %singleton; - - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("device"); - } -} + croak "invalid device observed state '$self->{observed_state}', valid: writeable, readable, unreachable" + if $self->{observed_state} && $self->{observed_state} !~ /^(?:writeable|readable|unreachable)$/; -sub check_cache { - my $class = shift; - my $now = $Mgd::nowish || time(); - return if $last_load > $now - DEVICE_SUMMARY_CACHE_TIMEOUT; - MogileFS::Device->reload_devices; + return $self; } -# -------------------------------------------------------------------------- - -sub devid { return $_[0]{devid} } -sub id { return $_[0]{devid} } +# Instance methods -sub absorb_dbrow { - my ($dev, $hashref) = @_; - foreach my $k (qw(hostid mb_total mb_used mb_asof status weight)) { - $dev->{$k} = $hashref->{$k}; - } - - $dev->{$_} ||= 0 foreach qw(mb_total mb_used mb_asof); - - my $host = MogileFS::Host->of_hostid($dev->{hostid}); - if ($host && $host->exists) { - my $host_status = $host->status; - die "No status" unless $host_status =~ /^\w+$/; - # FIXME: not sure I like this, changing the in-memory version - # of the configured status is. I'd rather this be calculated - # in an accessor. - if ($dev->{status} eq 'alive' && $host_status ne 'alive') { - $dev->{status} = "down" - } - } else { - if ($dev->{status} eq "dead") { - # ignore dead devices without hosts. not a big deal. - } else { - die "No host for dev $dev->{devid} (host $dev->{hostid})"; - } - } +sub id { return $_[0]{devid} } +sub devid { return $_[0]{devid} } +sub name { return $_[0]{devid} } +sub status { return $_[0]{status} } +sub weight { return $_[0]{weight} } +sub hostid { return $_[0]{hostid} } - $dev->{_loaded} = 1; +sub host { + my $self = shift; + return $self->{host_factory}->get_by_id($self->{hostid}); } # returns 0 if not known, else [0,1] sub percent_free { - my $dev = shift; - $dev->_load; - return 0 unless $dev->{mb_total} && defined $dev->{mb_used}; - return 1 - ($dev->{mb_used} / $dev->{mb_total}); + my $self = shift; + return 0 unless $self->{mb_total} && defined $self->{mb_used}; + return 1 - ($self->{mb_used} / $self->{mb_total}); } # returns undef if not known, else [0,1] sub percent_full { - my $dev = shift; - $dev->_load; - return undef unless $dev->{mb_total} && defined $dev->{mb_used}; - return $dev->{mb_used} / $dev->{mb_total}; + my $self = shift; + return undef unless $self->{mb_total} && defined $self->{mb_used}; + return $self->{mb_used} / $self->{mb_total}; } -our $util_no_broadcast = 0; - -sub set_observed_utilization { - my ($dev, $util) = @_; - $dev->{utilization} = $util; - my $devid = $dev->id; - - return if $util_no_broadcast; +# FIXME: $self->mb_free? +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + my $ret = { (map { $_ => $self->{$_} } @tofetch), + 'mb_free' => $self->mb_free }; + return $ret; +} - my $worker = MogileFS::ProcManager->is_child or return; - $worker->send_to_parent(":set_dev_utilization $devid $util"); +sub observed_fields { + return $_[0]->fields(@observed_fields); } sub observed_utilization { - my ($dev) = @_; + my $self = shift; if (TESTING) { - my $weight_varname = 'T_FAKE_IO_DEV' . $dev->id; + my $weight_varname = 'T_FAKE_IO_DEV' . $self->id; return $ENV{$weight_varname} if defined $ENV{$weight_varname}; } - return $dev->{utilization}; + return $self->{utilization}; } -sub set_observed_state { - my ($dev, $state) = @_; - croak "set_observed_state() with invalid device state '$state', valid: writeable, readable, unreachable" - if $state !~ /^(?:writeable|readable|unreachable)$/; - $dev->{observed_state} = $state; +sub host_ok { + my $host = $_[0]->host; + return ($host && $host->observed_reachable); } sub observed_writeable { - my $dev = shift; - return 0 unless $dev->{observed_state} && $dev->{observed_state} eq "writeable"; - my $host = $dev->host - or return 0; - return 0 unless $host->observed_reachable; - return 1; + my $self = shift; + return 0 unless $self->host_ok; + return $self->{observed_state} && $self->{observed_state} eq 'writeable'; } sub observed_readable { - my $dev = shift; - return $dev->{observed_state} && $dev->{observed_state} eq "readable"; + my $self = shift; + return 0 unless $self->host_ok; + return $self->{observed_state} && $self->{observed_state} eq 'readable'; } sub observed_unreachable { - my $dev = shift; - return $dev->{observed_state} && $dev->{observed_state} eq "unreachable"; -} - -# returns status as a string (SEE ALSO: dstate, returns DeviceState object, -# which knows the traits/capabilities of that named state) -sub status { - my $dev = shift; - $dev->_load; - return $dev->{status}; -} - -sub weight { - my $dev = shift; - $dev->_load; - return $dev->{weight}; + my $self = shift; + # host is unreachability implies device unreachability + return 1 unless $self->host_ok; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; } +# FIXME: This pattern is weird. Store the object on new? sub dstate { my $ds = device_state($_[0]->status); return $ds if $ds; @@ -276,34 +122,40 @@ sub dstate { } sub can_delete_from { - my $self = shift; - return $self->dstate->can_delete_from; + return $_[0]->host->alive && $_[0]->dstate->can_delete_from; } +# this method is for Monitor, other workers should use should_read_from sub can_read_from { - my $self = shift; - return $self->dstate->can_read_from; + return $_[0]->host->should_read_from && $_[0]->dstate->can_read_from; +} + +# this is the only method a worker should call for checking for readability +sub should_read_from { + return $_[0]->can_read_from && ($_[0]->observed_readable || $_[0]->observed_writeable); } +# FIXME: Is there a (unrelated to this code) bug where new files aren't tested +# against the free space limit before being stored or replicated somewhere? sub should_get_new_files { - my $dev = shift; - my $dstate = $dev->dstate; + my $self = shift; + my $dstate = $self->dstate; return 0 unless $dstate->should_get_new_files; - return 0 unless $dev->observed_writeable; - return 0 unless $dev->host->should_get_new_files; - + return 0 unless $self->observed_writeable; + return 0 unless $self->host->alive; # have enough disk space? (default: 100MB) my $min_free = MogileFS->config("min_free_space"); - return 0 if $dev->{mb_total} && - $dev->mb_free < $min_free; + return 0 if $self->{mb_total} && + $self->mb_free < $min_free; return 1; } sub mb_free { my $self = shift; - return $self->{mb_total} - $self->{mb_used}; + return $self->{mb_total} - $self->{mb_used} + if $self->{mb_total} && defined $self->{mb_used}; } sub mb_used { @@ -312,90 +164,78 @@ sub mb_used { # currently the same policy, but leaving it open for differences later. sub should_get_replicated_files { - my $dev = shift; - return $dev->should_get_new_files; + return $_[0]->should_get_new_files; } sub not_on_hosts { - my ($dev, @hosts) = @_; - my @hostids = map { ref($_) ? $_->hostid : $_ } @hosts; - my $my_hostid = $dev->hostid; + my ($self, @hosts) = @_; + my @hostids = map { ref($_) ? $_->id : $_ } @hosts; + my $my_hostid = $self->id; return (grep { $my_hostid == $_ } @hostids) ? 0 : 1; } -sub exists { - my $dev = shift; - $dev->_try_load; - return $dev->{_loaded}; -} - -sub host { - my $dev = shift; - return MogileFS::Host->of_hostid($dev->hostid); -} - -sub hostid { - my $dev = shift; - $dev->_load; - return $dev->{hostid}; -} - +# "cached" by nature of the monitor worker testing this. sub doesnt_know_mkcol { - my $self = shift; - # TODO: forget this periodically? maybe whenever host/device is observed down? - # in case webserver changes. - return $self->{no_mkcol}; + return $_[0]->{no_mkcol}; } +# Gross class-based singleton cache. my %dir_made; # /dev/path -> $time my $dir_made_lastclean = 0; -# returns 1 on success, 0 on failure + +# if no callback is supplied: returns 1 on success, 0 on failure +# if a callback is supplied, the return value will be passed to the callback +# upon completion. sub create_directory { - my ($self, $uri) = @_; - return 1 if $self->doesnt_know_mkcol; + my ($self, $uri, $cb) = @_; + if ($self->doesnt_know_mkcol || MogileFS::Config->server_setting_cached('skip_mkcol')) { + return $cb ? $cb->(1) : 1; + } # rfc2518 says we "should" use a trailing slash. Some servers # (nginx) appears to require it. - $uri .= '/' unless $uri =~ m!/$!; - - return 1 if $dir_made{$uri}; + $uri .= '/' unless $uri =~ m/\/$/; - my $hostid = $self->hostid; - my $host = $self->host; - my $hostip = $host->ip or return 0; - my $port = $host->http_port or return 0; - my $peer = "$hostip:$port"; - - my $sock = IO::Socket::INET->new(PeerAddr => $peer, Timeout => 1) - or return 0; - - print $sock "MKCOL $uri HTTP/1.0\r\n". - "Content-Length: 0\r\n\r\n"; - - my $ans = <$sock>; - - # if they don't support this method, remember that - if ($ans && $ans =~ m!HTTP/1\.[01] (400|501)!) { - $self->{no_mkcol} = 1; - # TODO: move this into method on device, which propagates to parent - # and also receive from parent. so all query workers share this knowledge - return 1; + if ($dir_made{$uri}) { + return $cb ? $cb->(1) : 1; } - return 0 unless $ans && $ans =~ m!^HTTP/1.[01] 2\d\d!; + my $res; + my $on_mkcol_response = sub { + if ($res->is_success) { + my $now = time(); + $dir_made{$uri} = $now; + + # cleanup %dir_made occasionally. + my $clean_interval = 300; # every 5 minutes. + if ($dir_made_lastclean < $now - $clean_interval) { + $dir_made_lastclean = $now; + foreach my $k (keys %dir_made) { + delete $dir_made{$k} if $dir_made{$k} < $now - 3600; + } + } + return 1; + } elsif ($res->code =~ /\A(?:400|501)\z/) { + # if they don't support this method, remember that + # TODO: move this into method in *monitor* worker + $self->{no_mkcol} = 1; + return 1; + } else { + return 0; + } + }; - my $now = time(); - $dir_made{$uri} = $now; + my %opts = ( headers => { "Content-Length" => "0" } ); + $self->host->http("MKCOL", $uri, \%opts, sub { + ($res) = @_; + $cb->($on_mkcol_response->()) if $cb; + }); - # cleanup %dir_made occasionally. - my $clean_interval = 300; # every 5 minutes. - if ($dir_made_lastclean < $now - $clean_interval) { - $dir_made_lastclean = $now; - foreach my $k (keys %dir_made) { - delete $dir_made{$k} if $dir_made{$k} < $now - 3600; - } - } - return 1; + return if $cb; + + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + return $on_mkcol_response->(); } sub fid_list { @@ -423,53 +263,19 @@ sub fid_chunks { } sub forget_about { - my ($dev, $fid) = @_; - Mgd::get_store()->remove_fidid_from_devid($fid->id, $dev->id); + my ($self, $fid) = @_; + Mgd::get_store()->remove_fidid_from_devid($fid->id, $self->id); return 1; } sub usage_url { - my $dev = shift; - my $host = $dev->host; + my $self = shift; + my $host = $self->host; my $get_port = $host->http_get_port; my $hostip = $host->ip; - return "http://$hostip:$get_port/dev$dev->{devid}/usage"; + return "http://$hostip:$get_port/dev$self->{devid}/usage"; } -sub overview_hashref { - my $dev = shift; - $dev->_load; - - my $ret = {}; - foreach my $k (qw(devid hostid status weight observed_state - mb_total mb_used mb_asof utilization)) { - $ret->{$k} = $dev->{$k}; - } - $ret->{mb_free} = $dev->mb_free; - return $ret; -} - -sub set_weight { - my ($dev, $weight) = @_; - my $sto = Mgd::get_store(); - $sto->set_device_weight($dev->id, $weight); - MogileFS::Device->invalidate_cache; -} - -sub set_state { - my ($dev, $state) = @_; - my $dstate = device_state($state) or - die "Bogus state"; - my $sto = Mgd::get_store(); - $sto->set_device_state($dev->id, $state); - MogileFS::Device->invalidate_cache; - - # wake a reaper process up from sleep to get started as soon as possible - # on re-replication - MogileFS::ProcManager->wake_a("reaper") if $dstate->should_wake_reaper; -} - -# given the current state, can this device transition into the provided $newstate? sub can_change_to_state { my ($self, $newstate) = @_; # don't allow dead -> alive transitions. (yes, still possible @@ -479,19 +285,47 @@ sub can_change_to_state { return 1; } -# -------------------------------------------------------------------------- +sub vivify_directories { + my ($self, $path, $cb) = @_; + + # $path is something like: + # http://10.0.0.26:7500/dev2/0/000/148/0000148056.fid -sub _load { - return if $_[0]{_loaded}; - MogileFS::Device->reload_devices; - return if $_[0]{_loaded}; - my $dev = shift; - croak "Device $dev->{devid} doesn't exist.\n"; + # three directories we'll want to make: + # http://10.0.0.26:7500/dev2/0 + # http://10.0.0.26:7500/dev2/0/000 + # http://10.0.0.26:7500/dev2/0/000/148 + + croak "non-HTTP mode no longer supported" unless $path =~ /^http/; + return 0 unless $path =~ m!/dev(\d+)/(\d+)/(\d\d\d)/(\d\d\d)/\d+\.fid$!; + my ($devid, $p1, $p2, $p3) = ($1, $2, $3, $4); + + die "devid mismatch" unless $self->id == $devid; + + if ($cb) { + $self->create_directory("/dev$devid/$p1", sub { + $self->create_directory("/dev$devid/$p1/$p2", sub { + $self->create_directory("/dev$devid/$p1/$p2/$p3", $cb); + }); + }); + } else { + $self->create_directory("/dev$devid/$p1"); + $self->create_directory("/dev$devid/$p1/$p2"); + $self->create_directory("/dev$devid/$p1/$p2/$p3"); + } +} + +# Compatibility interface since this old routine is unfortunately called +# internally within plugins. This data should be passed into any hooks which +# may need it? +# Currently an issue with MogileFS::Network + ZoneLocal +# Remove this in 2012. +sub devices { + return Mgd::device_factory()->get_all; } -sub _try_load { - return if $_[0]{_loaded}; - MogileFS::Device->reload_devices; +sub reject_bad_md5 { + return $_[0]->{reject_bad_md5}; } 1; diff --git a/lib/MogileFS/DeviceState.pm b/lib/MogileFS/DeviceState.pm index 6ac6add7..0b052a2a 100644 --- a/lib/MogileFS/DeviceState.pm +++ b/lib/MogileFS/DeviceState.pm @@ -44,7 +44,7 @@ sub can_delete_from { $_[0]->{write} } sub can_read_from { $_[0]->{read} } sub should_get_new_files { $_[0]->{new_files} } sub should_get_repl_files { $_[0]->{new_files} } -sub should_have_files { ! ($_[0]->{drain} || $_[0]->{dead}) } +sub should_have_files { ! $_[0]->{dead} } sub should_monitor { $_[0]->{monitor} } # named inconveniently so it's not taken to mean equalling string diff --git a/lib/MogileFS/Domain.pm b/lib/MogileFS/Domain.pm index 2130d6ea..a1464ebc 100644 --- a/lib/MogileFS/Domain.pm +++ b/lib/MogileFS/Domain.pm @@ -1,123 +1,27 @@ package MogileFS::Domain; use strict; use warnings; +use MogileFS::Server; use MogileFS::Util qw(throw); -# -------------------------------------------------------------------------- -# Class methods: -# -------------------------------------------------------------------------- +=head1 -my %singleton; # dmid -> MogileFS::Domain +MogileFS::Domain - domain class. -my %id2name; # dmid -> domainname(namespace) -my %name2id; # domainname(namespace) -> dmid +=cut -my $last_load = 0; - -# return singleton MogileFS::Domain, given a dmid -sub of_dmid { - my ($pkg, $dmid) = @_; - return undef unless $dmid; - return $singleton{$dmid} if $singleton{$dmid}; - - my $ns = $pkg->name_of_id($dmid) - or return undef; - - return $singleton{$dmid} = bless { - dmid => $dmid, - ns => $ns, - }, $pkg; -} - -# return singleton MogileFS::Domain, given a domain(namespace) -sub of_namespace { - my ($pkg, $ns) = @_; - return undef unless $ns; - my $dmid = $pkg->id_of_name($ns) - or return undef; - return MogileFS::Domain->of_dmid($dmid); -} - -# name to dmid, reloading if not in cache -sub id_of_name { - my ($pkg, $domain) = @_; - return $name2id{$domain} if $name2id{$domain}; - $pkg->reload_domains; - return $name2id{$domain}; -} - -# dmid to name, reloading if not in cache -sub name_of_id { - my ($pkg, $dmid) = @_; - return $id2name{$dmid} if $id2name{$dmid}; - $pkg->reload_domains; - return $id2name{$dmid}; -} - -# force reload of cache -sub reload_domains { - my $now = time(); - my $sto = Mgd::get_store(); - %name2id = $sto->get_all_domains; - %id2name = (); - while (my ($k, $v) = each %name2id) { - $id2name{$v} = $k; - } - - # Blow singleton cache on reload. Otherwise a *change* in data may not be - # reflected. - %singleton = (); - - $last_load = $now; +sub new_from_args { + my ($class, $args, $class_factory) = @_; + return bless { + class_factory => $class_factory, + %{$args}, + }, $class; } -# FIXME: should probably have an invalidate_cache variant that only -# flushes locally (for things like "get_domains" or "get_hosts", where -# it needs to be locally correct for the semantics of the command, but -# no need to propagate a cache invalidation to our peers) -sub invalidate_cache { - $last_load = 0; - %id2name = (); - %name2id = (); - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("domain"); - } -} - -sub check_cache { - my $pkg = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Domain->reload_domains; -} - -sub domains { - my $pkg = shift; - $pkg->check_cache; - return map { $pkg->of_dmid($_) } keys %id2name; -} - -# create a new domain given a name, returns MogileFS::Domain object on success. -# throws errors on failure. error codes include: -# "dup" -- on duplicate name -sub create { - my ($pkg, $name) = @_; - - # throws 'dup': - my $dmid = Mgd::get_store()->create_domain($name) - or die "create domain didn't return a dmid"; - - # return the domain id we created - MogileFS::Domain->invalidate_cache; - return MogileFS::Domain->of_dmid($dmid); -} - -# -------------------------------------------------------------------------- # Instance methods: -# -------------------------------------------------------------------------- -sub id { $_[0]->{dmid} } -sub name { $_[0]->{ns} } +sub id { $_[0]{dmid} } +sub name { $_[0]{namespace} } sub has_files { my $self = shift; @@ -126,34 +30,17 @@ sub has_files { } sub classes { - my $dom = shift; - # return a bunch of class objects for this domain - return MogileFS::Class->classes_of_domain($dom); -} - -# returns true if deleted. throws exceptions on errors. exception codes: -# 'has_files' if it has files. -sub delete { my $self = shift; - throw("has_files") if $self->has_files; - # TODO: delete its classes - my $rv = Mgd::get_store()->delete_domain($self->id); - MogileFS::Domain->invalidate_cache; - return $rv; + return $self->{class_factory}->get_all($self); } -# returns named class of domain sub class { - my ($dom, $clname) = @_; - foreach my $cl (MogileFS::Class->classes_of_domain($dom)) { - return $cl if $cl->name eq $clname; - } - return; + my $self = shift; + return $self->{class_factory}->get_by_name($self, $_[0]); } -sub create_class { - my ($dom, $clname) = @_; - return MogileFS::Class->create_class($dom, $clname); +sub observed_fields { + return {}; } 1; diff --git a/lib/MogileFS/FID.pm b/lib/MogileFS/FID.pm index fb58b2c7..627c45e4 100644 --- a/lib/MogileFS/FID.pm +++ b/lib/MogileFS/FID.pm @@ -3,8 +3,14 @@ use strict; use warnings; use Carp qw(croak); use MogileFS::ReplicationRequest qw(rr_upgrade); +use MogileFS::Server; use overload '""' => \&as_string; +BEGIN { + my $testing = $ENV{TESTING} ? 1 : 0; + eval "sub TESTING () { $testing }"; +} + sub new { my ($class, $fidid) = @_; croak("Invalid fidid") unless $fidid; @@ -112,6 +118,8 @@ sub update_devcount { my $no_lock = delete $opts{no_lock}; croak "Bogus options" if %opts; + return 1 if MogileFS::Config->server_setting_cached('skip_devcount'); + my $fidid = $self->{fidid}; my $sto = Mgd::get_store(); @@ -138,19 +146,22 @@ sub enqueue_for_replication { my $from_dev = delete $opts{from_device}; # devid or Device object croak("Unknown options to enqueue_for_replication") if %opts; my $from_devid = (ref $from_dev ? $from_dev->id : $from_dev) || undef; + # Still schedule for the future, but don't delay long + $in = 1 if (TESTING && $in); Mgd::get_store()->enqueue_for_replication($self->id, $from_devid, $in); } -sub mark_unreachable { - my $self = shift; - # update database table - Mgd::get_store()->mark_fidid_unreachable($self->id); -} - sub delete { my $fid = shift; my $sto = Mgd::get_store(); + my $memc = MogileFS::Config->memcache_client; + if ($memc) { + $fid->_tryload; + } $sto->delete_fidid($fid->id); + if ($memc && $fid->{_loaded}) { + $memc->delete("mogfid:$fid->{dmid}:$fid->{dkey}"); + } } # returns 1 on success, 0 on duplicate key error, dies on exception @@ -177,7 +188,7 @@ sub devids { sub devs { my $self = shift; - return map { MogileFS::Device->of_devid($_) } $self->devids; + return map { Mgd::device_factory()->get_by_id($_) } $self->devids; } sub devfids { @@ -189,7 +200,7 @@ sub devfids { # return FID's class sub class { my $self = shift; - return MogileFS::Class->of_fid($self); + return Mgd::class_factory()->get_by_id($self->dmid, $self->classid); } # Get reloaded the next time we're bothered. @@ -207,15 +218,10 @@ sub devids_meet_policy { my $polobj = $cls->repl_policy_obj; - my $alldev = MogileFS::Device->map + my $alldev = Mgd::device_factory()->map_by_id or die "No global device map"; my @devs = $self->devs; - # This is a little heavy handed just to fix the 'devcount' cache, but - # doing it here ensures we get the error logged. - if (@devs != $self->devcount) { - return 0; - } my %rep_args = ( fid => $self->id, @@ -260,6 +266,14 @@ sub forget_about_device { return 1; } +# return an FID's checksum object, undef if it's missing +sub checksum { + my $self = shift; + my $row = Mgd::get_store()->get_checksum($self->{fidid}) or return undef; + + MogileFS::Checksum->new($row); +} + 1; __END__ diff --git a/lib/MogileFS/Factory.pm b/lib/MogileFS/Factory.pm new file mode 100644 index 00000000..bd7bd4c3 --- /dev/null +++ b/lib/MogileFS/Factory.pm @@ -0,0 +1,101 @@ +package MogileFS::Factory; +use strict; +use warnings; + +=head1 + +MogileFS::MogFactory - singleton class for holding some common objects. + +=head1 ABOUT + +This module holds a singleton for caching objects which are common but +relatively low in number. Such as devices, compared to fids. + +This singleton is to be maintained by the parent process, and inherited to +children during fork. Post-fork, the cache is updated by natural commands, or +a monitor process pushing changes through the parent. + +The purpose is to provide a fresh cache, without forcing new children to +wait for a monitoring run before becoming useful. It also should greatly +reduce the number of simple DB queries, as those should only happen +periodically directly from the monitor job. + +=cut + +my %singleton; + +# Rename to new maybe? +sub get_factory { + my $class = shift; + if (!exists $singleton{$class}) { + $singleton{$class} = bless { + by_id => {}, + by_name => {}, + }, $class; + } + return $singleton{$class}; +} + +# Allow unit tests to blow us up. +sub t_wipe { + my $class = shift; + delete $singleton{$class}; +} + +# because 'add' means bail if already exists. +sub set { + my $self = shift; + my $obj = shift; + $self->{by_id}->{$obj->id} = $obj; + $self->{by_name}->{$obj->name} = $obj; + return $obj; +} + +sub remove { + my $self = shift; + my $obj = shift; + + if (exists $self->{by_id}->{$obj->id}) { + delete $self->{by_id}->{$obj->id}; + delete $self->{by_name}->{$obj->name}; + } +} + +sub get_by_id { + my ($self, $id) = @_; + return $self->{by_id}->{$id}; +} + +sub get_by_name { + my ($self, $name) = @_; + return $self->{by_name}->{$name}; +} + +sub get_ids { + my $self = shift; + return keys %{$self->{by_id}}; +} + +sub get_names { + my $self = shift; + return keys %{$self->{by_name}}; +} + +sub get_all { + my $self = shift; + return values %{$self->{by_id}}; +} + +sub map_by_id { + my $self = shift; + my $set = $self->{by_id}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +sub map_by_name { + my $self = shift; + my $set = $self->{by_name}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +1; diff --git a/lib/MogileFS/Factory/Class.pm b/lib/MogileFS/Factory/Class.pm new file mode 100644 index 00000000..b78ce496 --- /dev/null +++ b/lib/MogileFS/Factory/Class.pm @@ -0,0 +1,95 @@ +package MogileFS::Factory::Class; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::Class; + +# This class is a reimplementation since classids and classnames +# are not globally unique... uses the same interface. +# Stupid/wasteful. +sub set { + my ($self, $args) = @_; + my $domain_factory = MogileFS::Factory::Domain->get_factory; + + my $class = MogileFS::Class->new_from_args($args, $domain_factory); + my $dmid = $class->dmid; + $self->{by_id}->{$dmid}->{$class->id} = $class; + $self->{by_name}->{$dmid}->{$class->name} = $class; + return $class; +} + +# Class factory is very awkward. Lets be very flexible in what we take; a +# domain object + id, a dmid, or a string with dmid-classid. +sub _find_ids { + my $self = shift; + my $dom = shift; + my $dmid = ref $dom ? $dom->id : $dom; + if ($dmid =~ m/^(\d+)-(\d+)$/) { + return $1, $2; + } + return $dmid, @_; +} + +# Example of what we could use for testing. +# Test creates the object, injects its own factory, then hands it to us. +sub set_from_obj { + my ($self, $obj) = @_; +} + +sub remove { + my $self = shift; + my $class = shift; + my $domid = $class->dmid; + my $clsid = $class->id; + if (exists $self->{by_id}->{$domid}->{$clsid}) { + delete $self->{by_id}->{$domid}->{$clsid}; + delete $self->{by_name}->{$domid}->{$class->name}; + } +} + +sub get_by_id { + my $self = shift; + my ($dmid, $id) = $self->_find_ids(@_); + return $self->{by_id}->{$dmid}->{$id}; +} + +sub get_by_name { + my $self = shift; + my ($dmid, $name) = $self->_find_ids(@_); + return $self->{by_name}->{$dmid}->{$name}; +} + +sub get_ids { + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return keys %{$self->{by_id}->{$dmid}}; +} + +sub get_names { + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return keys %{$self->{by_name}->{$dmid}}; +} + +sub get_all { + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + return values %{$self->{by_id}->{$dmid}}; +} + +sub map_by_id { + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + my $set = $self->{by_id}->{$dmid}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +sub map_by_name { + my $self = shift; + my ($dmid) = $self->_find_ids(@_); + my $set = $self->{by_name}->{$dmid}; + return { map { $_ => $set->{$_} } keys %{$set} }; +} + +1; diff --git a/lib/MogileFS/Factory/Device.pm b/lib/MogileFS/Factory/Device.pm new file mode 100644 index 00000000..c9cd54b7 --- /dev/null +++ b/lib/MogileFS/Factory/Device.pm @@ -0,0 +1,14 @@ +package MogileFS::Factory::Device; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::Device; + +sub set { + my ($self, $args) = @_; + my $hostfactory = MogileFS::Factory::Host->get_factory; + return $self->SUPER::set(MogileFS::Device->new_from_args($args, $hostfactory)); +} + +1; diff --git a/lib/MogileFS/Factory/Domain.pm b/lib/MogileFS/Factory/Domain.pm new file mode 100644 index 00000000..36cee74d --- /dev/null +++ b/lib/MogileFS/Factory/Domain.pm @@ -0,0 +1,24 @@ +package MogileFS::Factory::Domain; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::Domain; + +sub set { + my ($self, $args) = @_; + my $classfactory = MogileFS::Factory::Class->get_factory; + my $dom = $self->SUPER::set(MogileFS::Domain->new_from_args($args, $classfactory)); + + # Stupid awkward classes have a magic "default" + # If it exists in the DB, it will be overridden. + my $cls = $classfactory->get_by_id($dom->id, 0); + unless ($cls) { + $classfactory->set({ dmid => $dom->id, classid => 0, + classname => 'default', + mindevcount => MogileFS->config('default_mindevcount')}); + } + return $dom; +} + +1; diff --git a/lib/MogileFS/Factory/Host.pm b/lib/MogileFS/Factory/Host.pm new file mode 100644 index 00000000..4c8c1e3c --- /dev/null +++ b/lib/MogileFS/Factory/Host.pm @@ -0,0 +1,14 @@ +package MogileFS::Factory::Host; +use strict; +use warnings; +use base 'MogileFS::Factory'; + +use MogileFS::Host; + +sub set { + my ($self, $args) = @_; + my $devfactory = MogileFS::Factory::Device->get_factory; + return $self->SUPER::set(MogileFS::Host->new_from_args($args, $devfactory)); +} + +1; diff --git a/lib/MogileFS/HTTPFile.pm b/lib/MogileFS/HTTPFile.pm index 2b4f307b..f1734bc7 100644 --- a/lib/MogileFS/HTTPFile.pm +++ b/lib/MogileFS/HTTPFile.pm @@ -2,15 +2,11 @@ package MogileFS::HTTPFile; use strict; use warnings; use Carp qw(croak); -use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM); +use Digest; +use MogileFS::Server; use MogileFS::Util qw(error undeferr wait_for_readability wait_for_writeability); -# (caching the connection used for HEAD requests) -my %http_socket; # host:port => [$pid, $time, $socket] - -# get size of file, return 0 on error. -# tries to finish in 2.5 seconds, under the client's default 3 second timeout. (configurable) -my %last_stream_connect_error; # host => $hirestime. +my %sidechannel_nexterr; # host => next error log time # create a new MogileFS::HTTPFile instance from a URL. not called # "new" because I don't want to imply that it's creating anything. @@ -45,7 +41,7 @@ sub host_id { # return MogileFS::Device object sub device { my $self = shift; - return MogileFS::Device->of_devid($self->device_id); + return Mgd::device_factory()->get_by_id($self->device_id); } # return MogileFS::Host object @@ -59,238 +55,203 @@ sub delete { my $self = shift; my %opts = @_; my ($host, $port) = ($self->{host}, $self->{port}); + my %http_opts = ( port => $port ); + my $res; - my $httpsock = IO::Socket::INET->new(PeerAddr => $host, PeerPort => $port, Timeout => 2) - or die "can't connect to $host:$port in 2 seconds"; - - $httpsock->write("DELETE $self->{uri} HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - - my $keep_alive = 0; - my $did_del = 0; - - while (defined (my $line = <$httpsock>)) { - $line =~ s/[\s\r\n]+$//; - last unless length $line; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - my $rescode = $1; - # make sure we get a good response - if ($rescode == 404 && $opts{ignore_missing}) { - $did_del = 1; - next; - } - unless ($rescode == 204) { - delete $http_socket{"$host:$port"}; - die "Bad response from $host:$port: [$line]"; - } - $did_del = 1; - next; - } - die "Unexpected HTTP response line during DELETE from $host:$port: [$line]" unless $did_del; - } - die "Didn't get valid HTTP response during DELETE from $host:port" unless $did_del; + $self->host->http("DELETE", $self->{uri}, \%http_opts, sub { ($res) = @_ }); + + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; - return 1; + if ($res->code == 204 || ($res->code == 404 && $opts{ignore_missing})) { + return 1; + } + my $line = $res->status_line; + die "Bad response on DELETE $self->{url}: [$line]"; } -# returns size of file, (doing a HEAD request and looking at content-length, or side-channel to mogstored) -# returns 0 on file missing (404 or -1 from sidechannel), +# returns size of file, (doing a HEAD request and looking at content-length) +# returns -1 on file missing (404), # returns undef on connectivity error +# +# If an optional callback is supplied, the return value is given to the +# callback. +# +# workers running Danga::Socket->EventLoop must supply a callback +# workers NOT running Danga::Socket->EventLoop msut not supply a callback use constant FILE_MISSING => -1; sub size { - my $self = shift; - my ($host, $port, $uri, $path) = map { $self->{$_} } qw(host port uri url); + my ($self, $cb) = @_; + my %opts = ( port => $self->{port} ); - # don't SIGPIPE us - my $flag_nosignal = MogileFS::Sys->flag_nosignal; - local $SIG{'PIPE'} = "IGNORE" unless $flag_nosignal; + if ($cb) { # run asynchronously + if (defined $self->{_size}) { + Danga::Socket->AddTimer(0, sub { $cb->($self->{_size}) }); + } else { + $self->host->http("HEAD", $self->{uri}, \%opts, sub { + $cb->($self->on_size_response(@_)); + }); + } + return undef; + } else { # run synchronously + return $self->{_size} if defined $self->{_size}; - # setup for sending size request to cached host - my $req = "size $uri\r\n"; - my $reqlen = length $req; - my $rv = 0; + my $res; + $self->host->http("HEAD", $self->{uri}, \%opts, sub { ($res) = @_ }); - my $mogconn = $self->host->mogstored_conn; - my $sock = $mogconn->sock_if_connected; - - my $start_time = Time::HiRes::time(); - - my $httpsock; - my $start_connecting_to_http = sub { - return if $httpsock; # don't allow starting connecting twice - - # try to reuse cached socket - if (my $cached = $http_socket{"$host:$port"}) { - my ($pid, $conntime, $cachesock) = @{ $cached }; - # see if it's still connected - if ($pid == $$ && getpeername($cachesock) && - $conntime > $start_time - 15 && - # readability would indicated conn closed, or garbage: - ! wait_for_readability(fileno($cachesock), 0.00)) - { - $httpsock = $cachesock; - return; - } - } + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; - socket $httpsock, PF_INET, SOCK_STREAM, IPPROTO_TCP; - IO::Handle::blocking($httpsock, 0); - connect $httpsock, Socket::sockaddr_in($port, Socket::inet_aton($host)); - }; + return $self->on_size_response($res); + } +} - # sub to parse the response from $sock. returns undef on error, - # or otherwise the size of the $path in bytes. - my $node_timeout = MogileFS->config("node_timeout"); - my $stream_response_timeout = 1.0; - my $read_timed_out = 0; - - # returns defined on a real answer (0 = file missing, >0 = file length), - # returns undef on connectivity problems. - my $parse_response = sub { - # give the socket 1 second to become readable until we get - # scared of no reply and start connecting to HTTP to do a HEAD - # request. if both timeout, we know the machine is gone, but - # we don't want to wait 2 seconds + 2 seconds... prefer to do - # connects in parallel to reduce overall latency. - unless (wait_for_readability(fileno($sock), $stream_response_timeout)) { - $start_connecting_to_http->(); - # give the socket its final time to get to 2 seconds - # before we really give up on it - unless (wait_for_readability(fileno($sock), $node_timeout - $stream_response_timeout)) { - $read_timed_out = 1; - close($sock); - return undef; - } +sub on_size_response { + my ($self, $res) = @_; + + if ($res->is_success) { + my $size = $res->header('content-length'); + if (! defined $size && + $res->header('server') =~ m/^lighttpd/) { + # lighttpd 1.4.x (main release) does not return content-length for + # 0 byte files. + $self->{_size} = 0; + return 0; } - - # now we know there's readable data (pseudo-gross: we assume - # if we were readable, the whole line is ready. this is a - # poor mix of low-level IO and buffered, blocking stdio but in - # practice it works...) - my $line = <$sock>; - return undef unless defined $line; - return undef unless $line =~ /^(\S+)\s+(-?\d+)/; # expected format: "uri size" - return undeferr("get_file_size() requested size of $path, got back size of $1 ($2 bytes)") - if $1 ne $uri; - # backchannel sends back -1 on non-existent file, which we map to the defined value '0' - return FILE_MISSING if $2 < 0; - # otherwise, return byte size of file - return $2+0; - }; - - my $conn_timeout = MogileFS->config("conn_timeout") || 2; - - # try using the cached socket - if ($sock) { - $rv = send($sock, $req, $flag_nosignal); - if ($!) { - $mogconn->mark_dead; - } elsif ($rv != $reqlen) { - # FIXME: perhaps we shouldn't error here, but instead - # treat the cached socket as bogus and reconnect? never - # seen that happen, though. - return undeferr("send() didn't return expected length ($rv, not $reqlen) for $path"); - } else { - # success - my $size = $parse_response->(); - return $size if defined $size; - $mogconn->mark_dead; + $self->{_size} = $size; + return $size; + } else { + if ($res->code == 404) { + return FILE_MISSING; } + return undeferr("Failed HEAD check for $self->{url} (" . $res->code . "): " + . $res->message); } - # try creating a connection to the stream - elsif (($last_stream_connect_error{$host} ||= 0) < $start_time - 15.0) - { - $sock = $mogconn->sock($conn_timeout); - - if ($sock) { - $rv = send($sock, $req, $flag_nosignal); - if ($!) { - return undeferr("error talking to mogstored stream ($path): $!"); - } elsif ($rv != $reqlen) { - return undeferr("send() didn't return expected length ($rv, not $reqlen) for $path"); - } else { - # success - my $size = $parse_response->(); - return $size if defined $size; - $mogconn->mark_dead; - } - } else { - # see if we timed out connecting. - my $elapsed = Time::HiRes::time() - $start_time; - if ($elapsed > $conn_timeout - 0.2) { - return undeferr("node $host seems to be down in get_file_size"); - } else { - # cache that we can't connect to the mogstored stream - # port for people using only apache/lighttpd (dav) on - # the storage nodes - $last_stream_connect_error{$host} = Time::HiRes::time(); - } +} - } +sub digest_mgmt { + my ($self, $alg, $ping_cb, $reason) = @_; + my $mogconn = $self->host->mogstored_conn; + my $node_timeout = MogileFS->config("node_timeout"); + my $sock; + my $rv; + my $expiry; + + # assuming the storage node can checksum at >=2MB/s, low expectations here + my $response_timeout = $self->size / (2 * 1024 * 1024); + if ($reason && $reason eq "fsck") { + # fsck has low priority in mogstored and is concurrency-limited, + # so this may be queued indefinitely behind digest requests for + # large files + $response_timeout += 3600; + } else { + # account for disk/network latency: + $response_timeout += $node_timeout; } - # failure case: use a HEAD request to get the size of the file: - # give them 2 seconds to connect to server, unless we'd already timed out earlier - my $time_remain = 2.5 - (Time::HiRes::time() - $start_time); - return undeferr("timed out on stream size check of $path, not doing HEAD") - if $time_remain <= 0; + $reason = defined($reason) ? " $reason" : ""; + my $uri = $self->{uri}; + my $req = "$alg $uri$reason\r\n"; + my $reqlen = length $req; - # try HTTP (this will only work once anyway, if we already started above) - $start_connecting_to_http->(); + # a dead/stale socket may not be detected until we try to recv on it + # after sending a request + my $retries = 2; + + my $host = $self->{host}; + +retry: + $sock = eval { $mogconn->sock($node_timeout) }; + if (defined $sock) { + delete $sidechannel_nexterr{$host}; + } else { + # avoid flooding logs with identical messages + my $err = $@; + my $next = $sidechannel_nexterr{$host} || 0; + my $now = time(); + return if $now < $next; + $sidechannel_nexterr{$host} = $now + 300; + return undeferr("sidechannel failure on $alg $uri: $err"); + } - # did we timeout? - unless (wait_for_writeability(fileno($httpsock), $time_remain)) { - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->broadcast_host_unreachable($self->host_id); + $rv = send($sock, $req, 0); + if ($! || $rv != $reqlen) { + my $err = $!; + $mogconn->mark_dead; + if ($retries-- <= 0) { + $req =~ tr/\r\n//d; + $err = $err ? "send() error ($req): $err" : + "short send() ($req): $rv != $reqlen"; + $err = $mogconn->{ip} . ":" . $mogconn->{port} . " $err"; + return undeferr($err); } - return undeferr("get_file_size() connect timeout for HTTP HEAD for size of $path"); + goto retry; } - # did we fail to connect? (got a RST, etc) - unless (getpeername($httpsock)) { - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->broadcast_device_unreachable($self->device_id); - } - return undeferr("get_file_size() connect failure for HTTP HEAD for size of $path"); + $expiry = Time::HiRes::time() + $response_timeout; + while (!wait_for_readability(fileno($sock), 1.0) && + (Time::HiRes::time() < $expiry)) { + $ping_cb->(); } - $time_remain = 2.5 - (Time::HiRes::time() - $start_time); - return undeferr("no time remaining to write HEAD request to $path") if $time_remain <= 0; - - $rv = syswrite($httpsock, "HEAD $uri HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - # FIXME: we don't even look at $rv ? - return undeferr("get_file_size() read timeout ($time_remain) for HTTP HEAD for size of $path") - unless wait_for_readability(fileno($httpsock), $time_remain); - - my $first = <$httpsock>; - return undeferr("get_file_size()'s HEAD request hung up on us") - unless $first; - my ($code) = $first =~ m!^HTTP/1\.\d (\d\d\d)! or - return undeferr("HEAD response to get_file_size looks bogus"); - return FILE_MISSING if $code == 404; - return undeferr("get_file_size()'s HEAD request wasn't a 200 OK, got: $code") - unless $code == 200; - - # FIXME: this could block too probably, if we don't get a whole - # line. in practice, all headers will come at once, though in same packet/read. - my $cl = undef; - my $keep_alive = 0; - while (defined (my $line = <$httpsock>)) { - if ($line eq "\r\n") { - if ($keep_alive) { - $http_socket{"$host:$port"} = [ $$, Time::HiRes::time(), $httpsock ]; - } else { - delete $http_socket{"$host:$port"}; - } - return $cl; - } - $cl = $1 if $line =~ /^Content-length: (\d+)/i; - $keep_alive = 1 if $line =~ /^Connection:.+\bkeep-alive\b/i; + $rv = <$sock>; + if (! $rv) { + $mogconn->mark_dead; + return undeferr("EOF from mogstored") if ($retries-- <= 0); + goto retry; + } elsif ($rv =~ /^\Q$uri\E \Q$alg\E=([a-f0-9]{32,128})\r\n/) { + my $hexdigest = $1; + + my $checksum = eval { + MogileFS::Checksum->from_string(0, "$alg:$hexdigest") + }; + return undeferr("$alg failed for $uri: $@") if $@; + return $checksum->{checksum}; + } elsif ($rv =~ /^\Q$uri\E \Q$alg\E=-1\r\n/) { + # FIXME, this could be another error like EMFILE/ENFILE + return FILE_MISSING; + } elsif ($rv =~ /^ERROR /) { + return; # old server, fallback to HTTP } - delete $http_socket{"$host:$port"}; - # no content length found? - return undeferr("get_file_size() found no content-length header in response for $path"); + chomp($rv); + return undeferr("mogstored failed to handle ($alg $uri): $rv"); +} + +sub digest_http { + my ($self, $alg, $ping_cb) = @_; + + my $digest = Digest->new($alg); + my %opts = ( + port => $self->{port}, + # default (4K) is tiny, use 1M like replicate + read_size_hint => 0x100000, + content_cb => sub { + $digest->add($_[0]); + $ping_cb->(); + }, + ); + + my $res; + $self->host->http("GET", $self->{uri}, \%opts, sub { ($res) = @_ }); + + # TODO: async interface for workers already running Danga::Socket->EventLoop + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + + return $digest->digest if $res->is_success; + return FILE_MISSING if $res->code == 404; + return undeferr("Failed $alg (GET) check for $self->{url} (" . $res->code . "): " + . $res->message); } +sub digest { + my ($self, $alg, $ping_cb, $reason) = @_; + my $digest = $self->digest_mgmt($alg, $ping_cb, $reason); + + return $digest if ($digest && $digest ne FILE_MISSING); + + $self->digest_http($alg, $ping_cb); +} 1; diff --git a/lib/MogileFS/Host.pm b/lib/MogileFS/Host.pm index c49c1e74..76c1c5a0 100644 --- a/lib/MogileFS/Host.pm +++ b/lib/MogileFS/Host.pm @@ -1,246 +1,98 @@ package MogileFS::Host; use strict; use warnings; +use MogileFS::Util qw(throw); use Net::Netmask; use Carp qw(croak); use MogileFS::Connection::Mogstored; +use MogileFS::Connection::HTTP; +use MogileFS::ConnectionPool; +our $http_pool; -my %singleton; # hostid -> instance -my $last_load = 0; # unixtime of last 'reload_hosts' -my $all_loaded = 0; # bool: have we loaded all the hosts? +=head1 -# returns MogileFS::Host object, or throws 'dup' error -sub create { - my ($pkg, $hostname, $ip) = @_; - my $hid = Mgd::get_store()->create_host($hostname, $ip); - return MogileFS::Host->of_hostid($hid); -} - -sub t_wipe_singletons { - %singleton = (); -} - -sub of_hostid { - my ($class, $hostid) = @_; - return undef unless $hostid; - return $singleton{$hostid} ||= bless { - hostid => $hostid, - _loaded => 0, - }, $class; -} - -sub of_hostname { - my ($class, $hostname) = @_; - - # reload if it's been awhile - MogileFS::Host->check_cache; - foreach my $host ($class->hosts) { - return $host if $host->{hostname} eq $hostname; - } - - # force a reload - MogileFS::Host->reload_hosts; - foreach my $host ($class->hosts) { - return $host if $host->{hostname} eq $hostname; - } - - return undef; -} - -sub invalidate_cache { - my $class = shift; - - # so next time it's invalid and won't be used old - $last_load = 0; - $all_loaded = 0; - $_->{_loaded} = 0 foreach values %singleton; - - if (my $worker = MogileFS::ProcManager->is_child) { - $worker->invalidate_meta("host"); - } -} - -# force a reload of all host objects. -sub reload_hosts { - my $class = shift; - - # mark them all invalid for now, until they're reloaded - foreach my $host (values %singleton) { - $host->{_loaded} = 0; - } - - my $sto = Mgd::get_store(); - foreach my $row ($sto->get_all_hosts) { - die unless $row->{status} =~ /^\w+$/; - my $ho = - MogileFS::Host->of_hostid($row->{hostid}); - $ho->absorb_dbrow($row); - } - - # get rid of ones that could've gone away: - foreach my $hostid (keys %singleton) { - my $host = $singleton{$hostid}; - delete $singleton{$hostid} unless $host->{_loaded} - } - - $all_loaded = 1; - $last_load = time(); -} - -# reload host objects if it hasn't been done in last 5 seconds -sub check_cache { - my $class = shift; - my $now = time(); - return if $last_load > $now - 5; - MogileFS::Host->reload_hosts; -} - -sub hosts { - my $class = shift; - $class->reload_hosts unless $all_loaded; - return values %singleton; -} +MogileFS::Host - host class -# -------------------------------------------------------------------------- +=cut -sub id { $_[0]{hostid} } -sub hostid { $_[0]{hostid} } +# Centralized here instead of three places. +my @observed_fields = qw/observed_state/; +my @fields = (qw/hostid hostname hostip status http_port http_get_port altip altmask/, + @observed_fields); -sub absorb_dbrow { - my ($host, $hashref) = @_; - foreach my $k (qw(status hostname hostip http_port http_get_port altip altmask)) { - $host->{$k} = $hashref->{$k}; - } - $host->{mask} = - ($host->{altip} && $host->{altmask}) ? - Net::Netmask->new2($host->{altmask}) : - undef; - - $host->{_loaded} = 1; -} - -sub set_observed_state { - my ($host, $state) = @_; - croak "set_observed_state() with invalid host state '$state', valid: reachable, unreachable" - if $state !~ /^(?:reachable|unreachable)$/; - $host->{observed_state} = $state; -} +# TODO: Validate a few things: state, observed state. +sub new_from_args { + my ($class, $args, $dev_factory) = @_; + my $self = bless { + dev_factory => $dev_factory, + %{$args}, + }, $class; -sub observed_reachable { - my $host = shift; - return $host->{observed_state} && $host->{observed_state} eq "reachable"; -} + $self->{mask} = ($self->{altip} && $self->{altmask}) ? + Net::Netmask->new2($self->{altmask}) : undef; -sub observed_unreachable { - my $host = shift; - return $host->{observed_state} && $host->{observed_state} eq "unreachable"; + return $self; } -sub set_status { shift->_set_field("status", @_); } -sub set_ip { shift->_set_field("hostip", @_); } # throws 'dup' -sub set_http_port { shift->_set_field("http_port", @_); } -sub set_http_get_port { shift->_set_field("http_get_port", @_); } -sub set_alt_ip { shift->_set_field("altip", @_); } -sub set_alt_mask { shift->_set_field("altmask", @_); } - -# for test suite. set fields in memory, without a MogileFS::Store -sub t_init { - my $self = shift; - my $status = shift; - # TODO: once we have a MogileFS::HostState, update this to - # validate it. not so important for now, though, since - # typos in tests will just make tests fail. - $self->{status} = $status; - $self->{_loaded} = 1; - $self->{observed_state} = "reachable"; +sub valid_state { + my ($class, $state) = @_; + return $state && $state =~ /\A(?:alive|dead|down|readonly)\z/; } -sub _set_field { - my ($self, $field, $val) = @_; - # $field is both the database column field and our member keys - $self->_load; - return 1 if $self->{$field} eq $val; - return 0 unless Mgd::get_store()->update_host_property($self->id, $field, $val); - $self->{$field} = $val; - MogileFS::Host->invalidate_cache; - return 1; -} +# Instance methods: -sub http_port { - my $host = shift; - $host->_load; - return $host->{http_port}; -} +sub id { $_[0]{hostid} } +sub name { $_[0]{hostname} } +sub hostname { $_[0]{hostname} } +sub hostip { $_[0]{hostip} } +sub status { $_[0]{status} } +sub http_port { $_[0]{http_port} } sub http_get_port { - my $host = shift; - $host->_load; - return $host->{http_get_port} || $host->{http_port}; + return $_[0]->{http_get_port} || $_[0]->{http_port}; } sub ip { - my $host = shift; - $host->_load; - if ($host->{mask} && $host->{altip} && + my $self = shift; + if ($self->{mask} && $self->{altip} && ($MogileFS::REQ_altzone || ($MogileFS::REQ_client_ip && - $host->{mask}->match($MogileFS::REQ_client_ip)))) { - return $host->{altip}; + $self->{mask}->match($MogileFS::REQ_client_ip)))) { + return $self->{altip}; } else { - return $host->{hostip}; + return $self->{hostip}; } } -sub field { - my ($host, $k) = @_; - $host->_load; - # TODO: validate $k to be in certain set of allowed keys? - return $host->{$k}; -} - -sub status { - my $host = shift; - $host->_load; - return $host->{status}; +sub fields { + my $self = shift; + my @tofetch = @_ ? @_ : @fields; + return { map { $_ => $self->{$_} } @tofetch }; } -sub hostname { - my $host = shift; - $host->_load; - return $host->{hostname}; +sub observed_fields { + return $_[0]->fields(@observed_fields); } -sub should_get_new_files { - my $host = shift; - return $host->status eq "alive"; +sub alive { + return $_[0]->status eq 'alive'; } -sub is_marked_down { - my $host = shift; - die "FIXME"; - # ... +sub readonly { + return $_[0]->status eq 'readonly'; } -sub exists { - my $host = shift; - $host->_try_load; - return $host->{_loaded}; +sub should_read_from { + return $_[0]->alive || $_[0]->readonly; } -sub overview_hashref { - my $host = shift; - $host->_load; - my $ret = {}; - foreach my $k (qw(hostid status http_port http_get_port hostname hostip altip altmask)) { - $ret->{$k} = $host->{$k}; - } - return $ret; +sub observed_reachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'reachable'; } -sub delete { - my $host = shift; - my $rv = Mgd::get_store()->delete_host($host->id); - MogileFS::Host->invalidate_cache; - return $rv; +sub observed_unreachable { + my $self = shift; + return $self->{observed_state} && $self->{observed_state} eq 'unreachable'; } # returns/creates a MogileFS::Connection::Mogstored object to the @@ -259,33 +111,62 @@ sub sidechannel_port { MogileFS->config("mogstored_stream_port"); } -# class method -sub valid_state { - my ($class, $state) = @_; - return $state && $state =~ /^alive|dead|down$/; +# starts an HTTP request on the given $port with $method to $path +# Calls cb with an HTTP::Response object when done +sub _http_conn { + my ($self, $port, $method, $path, $opts, $cb) = @_; + _init_pools(); + + $http_pool->start($opts->{ip} || $self->ip, $port, sub { + $_[0]->start($method, $path, $opts, $cb); + }); } -# class method. valid host state, for newly created hosts? -# currently equal to valid_state. -sub valid_initial_state { - my ($class, $state) = @_; - return $class->valid_state($state); +# Returns a ready, blocking HTTP connection +# This is only used by replicate +sub http_conn_get { + my ($self, $opts) = @_; + my $ip = $opts->{ip} || $self->ip; + my $port = $opts->{port} || $self->http_port; + + _init_pools(); + my $conn = $http_pool->conn_get($ip, $port); + $conn->sock->blocking(1) if $conn; + return $conn; } -# -------------------------------------------------------------------------- +# Returns a blocking HTTP connection back to the pool. +# This is the inverse of http_conn_get, and should be called when +# done using a connection (iff the connection is really still alive) +# (and makes it non-blocking for future use) +# This is only used by replicate. +sub http_conn_put { + my ($self, $conn) = @_; + $conn->sock->blocking(0); + $http_pool->conn_put($conn); +} -sub _load { - return if $_[0]{_loaded}; - MogileFS::Host->reload_hosts; - return if $_[0]{_loaded}; - my $host = shift; - croak "Host $host->{hostid} doesn't exist.\n"; +sub http_get { + my ($self, $method, $path, $opts, $cb) = @_; + $opts ||= {}; + $self->_http_conn($self->http_get_port, $method, $path, $opts, $cb); } -sub _try_load { - return if $_[0]{_loaded}; - MogileFS::Host->reload_hosts; +sub http { + my ($self, $method, $path, $opts, $cb) = @_; + $opts ||= {}; + my $port = delete $opts->{port} || $self->http_port; + $self->_http_conn($port, $method, $path, $opts, $cb); } +# FIXME - make these customizable +sub _init_pools { + return if $http_pool; + my $opts = { + total_capacity => MogileFS->config("conn_pool_size"), + }; + + $http_pool = MogileFS::ConnectionPool->new("MogileFS::Connection::HTTP", $opts); +} 1; diff --git a/lib/MogileFS/IOStatWatcher.pm b/lib/MogileFS/IOStatWatcher.pm index b6502aa8..3078cd14 100644 --- a/lib/MogileFS/IOStatWatcher.pm +++ b/lib/MogileFS/IOStatWatcher.pm @@ -97,6 +97,7 @@ sub got_disconnect { # Support class that does the communication with individual hosts. package MogileFS::IOStatWatch::Client; +use Socket qw(SO_KEEPALIVE); use strict; use warnings; @@ -117,6 +118,7 @@ sub new { ); return unless $sock; + $sock->sockopt(SO_KEEPALIVE, 1); $self = fields::new($self) unless ref $self; $self->SUPER::new($sock); $self->watch_write(1); diff --git a/lib/MogileFS/ProcManager.pm b/lib/MogileFS/ProcManager.pm index 5768c639..2c580f90 100644 --- a/lib/MogileFS/ProcManager.pm +++ b/lib/MogileFS/ProcManager.pm @@ -6,6 +6,7 @@ use Symbol; use Socket; use MogileFS::Connection::Client; use MogileFS::Connection::Worker; +use MogileFS::Util qw(apply_state_events); # This class handles keeping lists of workers and clients and # assigning them to each other when things happen. You don't actually @@ -37,17 +38,28 @@ my %child = (); # pid -> MogileFS::Connection::Worker my %todie = (); # pid -> 1 (lists pids that we've asked to die) my %jobs = (); # jobname -> [ min, current ] +# we start job_master after monitor has run, but this avoid undef warning +# in job_needs_reduction +$jobs{job_master} = [ 0, 0 ]; + our $allkidsup = 0; # if true, all our kids are running. set to 0 when a kid dies. my @prefork_cleanup; # subrefs to run to clean stuff up before we make a new child *error = \&Mgd::error; -my %dev_util; # devid -> utilization -my $last_util_spray = 0; # time we lost spread %dev_util to children +my $monitor_good = 0; # ticked after monitor executes once after startup my $nowish; # updated approximately once per second +# it's pointless to spawn certain jobs without a job_master +my $want_job_master; +my %needs_job_master = ( + delete => 1, + fsck => 1, + replicate => 1, +); + sub push_pre_fork_cleanup { my ($class, $code) = @_; push @prefork_cleanup, $code; @@ -135,12 +147,14 @@ sub PostEventLoopChecker { my $lastspawntime = 0; # time we last ran spawn_children sub return sub { + MogileFS::Connection::Client->ProcessPipelined; # run only once per second $nowish = time(); return 1 unless $nowish > $lastspawntime; $lastspawntime = $nowish; MogileFS::ProcManager->WatchDog; + MogileFS::Connection::Client->WriterWatchDog; # see if anybody has died, but don't hang up on doing so while(my $pid = waitpid -1, WNOHANG) { @@ -173,6 +187,10 @@ sub PostEventLoopChecker { # foreach job, fork enough children while (my ($job, $jobstat) = each %jobs) { + + # do not spawn job_master-dependent workers if we have no job_master + next if (! $want_job_master && $needs_job_master{$job}); + my $need = $jobstat->[0] - $jobstat->[1]; if ($need > 0) { error("Job $job has only $jobstat->[1], wants $jobstat->[0], making $need."); @@ -212,7 +230,7 @@ sub make_new_child { or return error("Can't block SIGINT for fork: $!"); socketpair(my $parents_ipc, my $childs_ipc, AF_UNIX, SOCK_STREAM, PF_UNSPEC ) - or die( "Sockpair failed" ); + or die( "socketpair failed: $!" ); return error("fork failed creating $job: $!") unless defined ($pid = fork); @@ -236,6 +254,9 @@ sub make_new_child { return $worker_conn; } + # let children have different random number seeds + srand(); + # as a child, we want to close these and ignore them $_->() foreach @prefork_cleanup; close($parents_ipc); @@ -298,6 +319,10 @@ sub foreach_pending_query { } } +sub is_monitor_good { + return $monitor_good; +} + sub is_valid_job { my ($class, $job) = @_; return defined $jobs{$job}; @@ -310,7 +335,9 @@ sub valid_jobs { sub request_job_process { my ($class, $job, $n) = @_; return 0 unless $class->is_valid_job($job); - return 0 if $job eq 'job_master' && $n > 1; # ghetto special case + return 0 if ($job =~ /^(?:job_master|monitor)$/i && $n > 1); # ghetto special case + + $want_job_master = $n if ($job eq "job_master"); $jobs{$job}->[0] = $n; $allkidsup = 0; @@ -337,9 +364,15 @@ sub SetAsChild { %ErrorsTo = (); %idle_workers = (); %pending_work = (); + %ChildrenByJob = (); + %child = (); + %todie = (); + %jobs = (); - # and now kill off our event loop so that we don't waste time - Danga::Socket->SetPostLoopCallback(sub { return 0; }); + # we just forked from our parent process, also using Danga::Socket, + # so we need to lose all that state and start afresh. + Danga::Socket->Reset; + MogileFS::Connection::Client->Reset; } # called when a child has died. a child is someone doing a job for us, @@ -408,6 +441,10 @@ sub EnqueueCommandRequest { ($client->peer_ip_string || '0.0.0.0') . " $line" ]; MogileFS::ProcManager->ProcessQueues; + if (@PendingQueries) { + # Don't like the name. Feel free to change if you find better. + $Stats{times_out_of_qworkers}++; + } } # puts a worker back in the queue, deleting any outstanding jobs in @@ -524,6 +561,7 @@ sub process_worker_queues { # allow workers to grab a linear range of work. while (@$queue && $worker->wants_todo($job)) { $worker->write(":queue_todo $job " . shift(@$queue) . "\r\n"); + $Stats{'work_sent_to_' . $job}++; } next JOB unless @$queue; } @@ -549,7 +587,7 @@ sub ProcessQueues { next unless $clref; # get worker and make sure it's not closed already - my MogileFS::Connection::Worker $worker = shift @IdleQueryWorkers; + my MogileFS::Connection::Worker $worker = pop @IdleQueryWorkers; if (!defined $worker || $worker->{closed}) { unshift @PendingQueries, $clref; next; @@ -620,10 +658,6 @@ sub HandleChildRequest { # pass it on to our error handler, prefaced with the child's job Mgd::debug("[" . $child->job . "(" . $child->pid . ")] $1"); - } elsif ($cmd =~ /^:state_change (\w+) (\d+) (\w+)/) { - my ($what, $whatid, $state) = ($1, $2, $3); - state_change($what, $whatid, $state, $child); - } elsif ($cmd =~ /^queue_depth (\w+)/) { my $job = $1; if ($job eq 'all') { @@ -676,30 +710,27 @@ sub HandleChildRequest { } elsif ($cmd eq ":still_alive") { # a no-op + } elsif ($cmd =~ /^:monitor_events/) { + # Apply the state locally, so when we fork children they have a + # pre-parsed factory. + # We do not replay the events back to where it came, since this + # severely impacts startup performance for instances with several + # thousand domains, classes, hosts or devices. + apply_state_events(\$cmd); + MogileFS::ProcManager->send_to_all_children($cmd, $child); + } elsif ($cmd eq ":monitor_just_ran") { send_monitor_has_run($child); } elsif ($cmd =~ /^:wake_a (\w+)$/) { MogileFS::ProcManager->wake_a($1, $child); - - } elsif ($cmd =~ /^:invalidate_meta (\w+)/) { - - my $what = $1; - MogileFS::ProcManager->send_to_all_children(":invalidate_meta_once $what", $child); - } elsif ($cmd =~ /^:set_config_from_child (\S+) (.+)/) { # and this will rebroadcast it to all other children # (including the one that just set it to us, but eh) MogileFS::Config->set_config($1, $2); - } elsif (my ($devid, $util) = $cmd =~ /^:set_dev_utilization (\d+) (.+)/) { - $dev_util{$devid} = $util; - - # time to rebroadcast dev utilization messages to all children? - if ($nowish > $last_util_spray + 3) { - $last_util_spray = $nowish; - MogileFS::ProcManager->send_to_all_children(":set_dev_utilization " . join(" ", %dev_util)); - } + } elsif ($cmd =~ /^:refresh_monitor$/) { + MogileFS::ProcManager->ImmediateSendToChildrenByJob("monitor", $cmd); } else { # unknown command my $show = $cmd; @@ -777,6 +808,16 @@ sub note_pending_death { # see if we should reduce the number of active children sub job_needs_reduction { my $job = shift; + my $q; + + # drop job_master-dependent workers if there is no job_master and no + # previously queued work + if (!$want_job_master && $needs_job_master{$job} + && $jobs{job_master}->[1] == 0 # check if job_master is really dead + && (($q = $pending_work{$job}) && !@$q || !$q)) { + return 1; + } + return $jobs{$job}->[0] < $jobs{$job}->[1]; } @@ -784,21 +825,6 @@ sub is_child { return $IsChild; } -sub state_change { - my ($what, $whatid, $state, $exclude) = @_; - my $key = "$what-$whatid"; - my $now = time(); - foreach my $child (values %child) { - my $old = $child->{known_state}{$key} || ""; - if (!$old || $old->[1] ne $state || $old->[0] < $now - 300) { - $child->{known_state}{$key} = [$now, $state]; - - $child->write(":state_change $what $whatid $state\r\n") - unless $exclude && $child == $exclude; - } - } -} - sub wake_a { my ($pkg, $class, $fromchild) = @_; # from arg is optional (which child sent it) my $child = MogileFS::ProcManager->is_child; @@ -813,13 +839,28 @@ sub send_to_all_children { my ($pkg, $msg, $exclude) = @_; foreach my $child (values %child) { next if $exclude && $child == $exclude; - $child->write("$msg\r\n"); + $child->write($msg . "\r\n"); } } sub send_monitor_has_run { my $child = shift; - for my $type (qw(replicate fsck queryworker delete)) { + # Gas up other workers if monitor's completed for the first time. + if (! $monitor_good) { + MogileFS::ProcManager->set_min_workers('queryworker' => MogileFS->config('query_jobs')); + MogileFS::ProcManager->set_min_workers('delete' => MogileFS->config('delete_jobs')); + MogileFS::ProcManager->set_min_workers('replicate' => MogileFS->config('replicate_jobs')); + MogileFS::ProcManager->set_min_workers('reaper' => MogileFS->config('reaper_jobs')); + MogileFS::ProcManager->set_min_workers('fsck' => MogileFS->config('fsck_jobs')); + + # only one job_master at most + $want_job_master = !!MogileFS->config('job_master'); + MogileFS::ProcManager->set_min_workers('job_master' => $want_job_master); + + $monitor_good = 1; + $allkidsup = 0; + } + for my $type (qw(queryworker)) { MogileFS::ProcManager->ImmediateSendToChildrenByJob($type, ":monitor_has_run", $child); } } diff --git a/lib/MogileFS/Rebalance.pm b/lib/MogileFS/Rebalance.pm index 0791abe7..9a17ca9b 100644 --- a/lib/MogileFS/Rebalance.pm +++ b/lib/MogileFS/Rebalance.pm @@ -3,6 +3,8 @@ use strict; use warnings; use Carp qw(croak); use List::Util (); +use MogileFS::Server (); + # Note: The filters aren't written for maximum speed, as they're not likely # in the slow path. They're supposed to be readable/extensible. Please don't # cram them down unless you have to. @@ -51,8 +53,15 @@ my %default_state = ( sdev_current => 0, sdev_lastfid => 0, sdev_limit => 0, + limit => 0, fids_queued => 0, bytes_queued => 0, + time_started => 0, + time_finished => 0, + time_stopped => 0, + time_latest_run => 0, + time_latest_empty_run => 0, + empty_runs => 0, ); sub new { @@ -83,9 +92,27 @@ sub init { # If we don't have an initial source device list, discover them. # Used to filter destination devices later. $state{source_devs} = $self->filter_source_devices($devs); + $state{time_started} = time(); $self->{state} = \%state; } +sub stop { + my $self = shift; + my $p = $self->{policy}; + my $s = $self->{state}; + my $sdev = $self->{sdev_current}; + unless ($p->{leave_in_drain_mode}) { + Mgd::get_store()->set_device_state($sdev, 'alive') if $sdev; + } + $s->{time_stopped} = time(); +} + +sub finish { + my $self = shift; + my $s = $self->{state}; + $s->{time_finished} = time(); +} + # Resume from saved as_string state. sub load_state { my $self = shift; @@ -141,6 +168,7 @@ sub _parse_settings { # the constraint also serves as a set of defaults. %parsed = %{$constraint} if ($constraint); + return unless $settings; # parse out from a string: key=value key=value for my $tuple (split /\s/, $settings) { my ($key, $value) = split /=/, $tuple; @@ -189,7 +217,7 @@ sub next_fids_to_rebalance { # If we're not working against a source device, discover one my $sdev = $self->_find_source_device($state->{source_devs}); return undef unless $sdev; - $sdev = MogileFS::Device->of_devid($sdev); + $sdev = Mgd::device_factory()->get_by_id($sdev); my $filtered_destdevs = $self->filter_dest_devices($devs); croak("rebalance cannot find suitable destination devices") @@ -220,6 +248,12 @@ sub next_fids_to_rebalance { push(@devfids, [$fid->id, $sdev->id, $destdevs]); } + $state->{time_latest_run} = time; + unless (@devfids) { + $state->{empty_runs}++; + $state->{time_latest_empty_run} = time; + } + # return block of fiddev combos. return \@devfids; } @@ -275,7 +309,7 @@ sub _choose_dest_devs { my @shuffled_devs = List::Util::shuffle(@$filtered_devs); return \@shuffled_devs if ($p->{use_dest_devs} eq 'all'); - return splice @shuffled_devs, 0, $p->{use_dest_devs}; + return [splice @shuffled_devs, 0, $p->{use_dest_devs}]; } # Iterate through all possible constraints until we have a final list. @@ -287,7 +321,7 @@ sub filter_source_devices { my @sdevs = (); for my $dev (@$devs) { - next unless $dev->exists && $dev->can_delete_from; + next unless $dev->can_delete_from; my $id = $dev->id; if (@{$policy->{from_devices}}) { next unless grep { $_ == $id } @{$policy->{from_devices}}; @@ -339,7 +373,7 @@ sub _finish_source_device { # Unless the user wants a device to never get new files again (sticking in # drain mode), return to alive. unless ($policy->{leave_in_drain_mode}) { - MogileFS::Device->of_devid($sdev)->set_state('alive'); + Mgd::get_store()->set_device_state($sdev, 'alive'); } push @{$state->{completed_devs}}, $sdev; } @@ -370,7 +404,7 @@ sub _find_source_device { } } # Must mark device in "drain" mode while we work on it. - MogileFS::Device->of_devid($sdev)->set_state('drain'); + Mgd::get_store()->set_device_state($sdev, 'drain'); $state->{sdev_limit} = $limit; } @@ -406,12 +440,13 @@ sub filter_dest_devices { # skip anything we would source from. # FIXME: ends up not skipping stuff out of completed_devs? :/ - my %sdevs = map { $_ => 1 } @{$state->{source_devs}}; - my @devs = grep { ! $sdevs{$_} } @$devs; + my %sdevs = map { $_ => 1 } @{$state->{source_devs}}, + @{$state->{completed_devs}}, $state->{sdev_current}; + my @devs = grep { ! $sdevs{$_->id} } @$devs; my @ddevs = (); for my $dev (@devs) { - next unless $dev->exists && $dev->should_get_new_files; + next unless $dev->should_get_new_files; my $id = $dev->id; my $hostid = $dev->hostid; diff --git a/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm b/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm new file mode 100644 index 00000000..afe4e2b2 --- /dev/null +++ b/lib/MogileFS/ReplicationPolicy/MultipleDevices.pm @@ -0,0 +1,108 @@ +package MogileFS::ReplicationPolicy::MultipleDevices; +use strict; +use base 'MogileFS::ReplicationPolicy'; +use MogileFS::Util qw(weighted_list); +use MogileFS::ReplicationRequest qw(ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); + +sub new { + my ($class, $mindevcount) = @_; + return bless { + mindevcount => $mindevcount, + }, $class; +} + +sub new_from_policy_args { + my ($class, $argref) = @_; + # Note: "MultipleDevices()" is okay, in which case the 'mindevcount' + # on the class is used. (see below) + $$argref =~ s/^\s* \( \s* (\d*) \s* \) \s*//x + or die "$class failed to parse args: $$argref"; + return $class->new($1) +} + +sub mindevcount { $_[0]{mindevcount} } + +sub replicate_to { + my ($self, %args) = @_; + + my $fid = delete $args{fid}; # fid scalar to copy + my $on_devs = delete $args{on_devs}; # arrayref of device objects + my $all_devs = delete $args{all_devs}; # hashref of { devid => MogileFS::Device } + my $failed = delete $args{failed}; # hashref of { devid => 1 } of failed attempts this round + + # this is the per-class mindevcount (the old way), which is passed in automatically + # from the replication worker. but if we have our own configured mindevcount + # in class.replpolicy, like "MultipleHosts(3)", then we use the explicit one. otherwise, + # if blank, or zero, like "MultipleHosts()", then we use the builtin on + my $min = delete $args{min}; + $min = $self->{mindevcount} || $min; + + warn "Unknown parameters: " . join(", ", sort keys %args) if %args; + die "Missing parameters" unless $on_devs && $all_devs && $failed && $fid; + + # number of devices we currently live on + my $already_on = @$on_devs; + + return ALL_GOOD if $min == $already_on; + return TOO_GOOD if $already_on > $min; + + # total disks available which are candidates for having files on them + my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; + + my %on_dev = map { $_->id => 1 } @$on_devs; + + # if we have two copies and that's all the disks there are + # anywhere, be happy enough, even if mindevcount is higher. in + # that case, when they add more disks later, they'll need to fsck + # to make files replicate more. + # this is here instead of above in case an over replication error causes + # the file to be on all disks (where more than necessary) + return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; + + my @all_dests = sort { + $b->percent_free <=> $a->percent_free + } grep { + ! $on_dev{$_->devid} && + ! $failed->{$_->devid} && + $_->should_get_replicated_files + } values %$all_devs; + + return TEMP_NO_ANSWER unless @all_dests; + + # Do this little dance to only weight-shuffle the top end of empty devices + @all_dests = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@all_dests, 0, 20)); + + return MogileFS::ReplicationRequest->new( + ideal => \@all_dests, + desperate => [], + ); +} + +1; + +# Local Variables: +# mode: perl +# c-basic-indent: 4 +# indent-tabs-mode: nil +# End: + +__END__ + +=head1 NAME + +MogileFS::ReplicationPolicy::MultipleDevices -- bare-bones replication policy + +=head1 RULES + +This policy only puts files onto different devices. This is intended to be a +quieter alternative to the default MultipleHosts replication policy when hosts +are heavily-imbalanced (one host has much more storage capacity than another). +This aims to avoid the noisy "policy_no_suggestions" log messages in clusters +where one large host contains the bulk of the storage. + +=head1 SEE ALSO + +L + +L diff --git a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm index bd25f98d..946be0d6 100644 --- a/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm +++ b/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm @@ -44,7 +44,7 @@ sub replicate_to { my $already_on = @$on_devs; # a silly special case, bail out early. - return ALL_GOOD if $min == 1 && $already_on; + return ALL_GOOD if $min == 1 && $already_on == 1; # total disks available which are candidates for having files on them my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; @@ -97,12 +97,7 @@ sub replicate_to { return TEMP_NO_ANSWER if $already_on >= $min && @ideal == 0; - # Do this little dance to only weight-shuffle the top end of empty devices - # to save CPU. - @ideal = weighted_list(map { [$_, 100 * $_->percent_free] } - splice(@ideal, 0, 20)); - @desp = weighted_list(map { [$_, 100 * $_->percent_free] } - splice(@desp, 0, 20)); + $self->sort_devices(\@ideal, \@desp, $fid); return MogileFS::ReplicationRequest->new( ideal => \@ideal, @@ -121,6 +116,21 @@ sub unique_hosts { return scalar keys %host; } +sub sort_devices { + my ($self, $ideal, $desp, $fid) = @_; + + # Do this little dance to only weight-shuffle the top end of empty devices + # to save CPU. + + @{ $ideal } = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@{ $ideal }, 0, 20)); + + @{ $desp } = weighted_list(map { [$_, 100 * $_->percent_free] } + splice(@{ $desp }, 0, 20)); + + return; +} + 1; # Local Variables: diff --git a/lib/MogileFS/ReplicationRequest.pm b/lib/MogileFS/ReplicationRequest.pm index 625df014..a5b0828d 100644 --- a/lib/MogileFS/ReplicationRequest.pm +++ b/lib/MogileFS/ReplicationRequest.pm @@ -1,5 +1,6 @@ package MogileFS::ReplicationRequest; use strict; +use MogileFS::Server; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(rr_upgrade ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); @@ -26,7 +27,7 @@ sub rr_upgrade { # for ideal replications sub replicate_to { my ($class, @devs) = @_; - @devs = map { ref $_ ? $_ : MogileFS::Device->of_devid($_) } @devs; + @devs = map { ref $_ ? $_ : Mgd::device_factory()->get_by_id($_) } @devs; return bless { ideal_next => \@devs, }, $class; diff --git a/lib/MogileFS/Server.pm b/lib/MogileFS/Server.pm index 27f6081f..7520e063 100644 --- a/lib/MogileFS/Server.pm +++ b/lib/MogileFS/Server.pm @@ -2,7 +2,7 @@ package MogileFS::Server; use strict; use warnings; use vars qw($VERSION); -$VERSION = "2.37"; +$VERSION = "2.73"; =head1 NAME @@ -15,25 +15,6 @@ MogileFS::Server - MogileFS (distributed filesystem) server =cut -# based on where we found this file (a pure-perl module), -# add the mogdeps/ subdirectory of that base to our @INC search -# path, where all the misc Mogile dependencies are installed. -BEGIN { - my $libpath; - if (! $ENV{MOGILE_NO_BUILTIN_DEPS} && - ($libpath = $INC{"MogileFS/Server.pm"}) && - $libpath =~ s!MogileFS/Server.pm$!!) - { - my $dep_dir = "${libpath}mogdeps"; - push @INC, $dep_dir; - unless (($ENV{PERL5LIB} || "") =~ /$dep_dir/) { - $ENV{PERL5LIB} = join(":", - split(/:/, $ENV{PERL5LIB} || ""), - $dep_dir); - } - } -} - use IO::Socket; use Symbol; use POSIX; @@ -44,12 +25,10 @@ use File::Path (); use Sys::Syslog (); use Time::HiRes (); use Net::Netmask; -use LWP::UserAgent; use List::Util; -use Socket (); +use Socket qw(SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY); use MogileFS::Util qw(daemonize); -use MogileFS::Sys; use MogileFS::Config; use MogileFS::ProcManager; @@ -64,16 +43,20 @@ use MogileFS::Worker::Monitor; use MogileFS::Worker::Fsck; use MogileFS::Worker::JobMaster; -use MogileFS::HTTPFile; +use MogileFS::Factory::Domain; +use MogileFS::Factory::Class; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use MogileFS::Domain; use MogileFS::Class; -use MogileFS::Device; use MogileFS::Host; +use MogileFS::Device; + +use MogileFS::HTTPFile; use MogileFS::FID; -use MogileFS::Domain; use MogileFS::DevFID; use MogileFS::Store; -use MogileFS::Store::MySQL; # FIXME: don't load this until after reading their config, but before fork. use MogileFS::ReplicationPolicy::MultipleHosts; @@ -99,13 +82,7 @@ sub run { MogileFS::Config->check_database; daemonize() if MogileFS->config("daemonize"); - MogileFS::ProcManager->set_min_workers('queryworker' => MogileFS->config('query_jobs')); - MogileFS::ProcManager->set_min_workers('delete' => MogileFS->config('delete_jobs')); - MogileFS::ProcManager->set_min_workers('replicate' => MogileFS->config('replicate_jobs')); - MogileFS::ProcManager->set_min_workers('reaper' => MogileFS->config('reaper_jobs')); - MogileFS::ProcManager->set_min_workers('monitor' => MogileFS->config('monitor_jobs')); - MogileFS::ProcManager->set_min_workers('fsck' => MogileFS->config('fsck_jobs')); - MogileFS::ProcManager->set_min_workers('job_master' => 1); + MogileFS::ProcManager->set_min_workers('monitor' => 1); # open up our log Sys::Syslog::openlog('mogilefsd', 'pid', 'daemon'); @@ -151,6 +128,8 @@ sub run { Reuse => 1, Listen => 1024 ) or die "Error creating socket: $@\n"; + $server->sockopt(SO_KEEPALIVE, 1); + $server->setsockopt(IPPROTO_TCP, TCP_NODELAY, 1); # save sub to accept a client push @servers, $server; @@ -223,14 +202,37 @@ use strict; use warnings; use MogileFS::Config; use MogileFS::Util qw(error fatal debug); # for others calling Mgd::foo() +use Socket qw(SOL_SOCKET SO_RCVBUF AF_UNIX SOCK_STREAM PF_UNSPEC); +BEGIN { + # detect the receive buffer size for Unix domain stream sockets, + # we assume the size is identical across all Unix domain sockets. + socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, PF_UNSPEC) + or die( "socketpair failed: $!" ); + + my $r = getsockopt($s1, SOL_SOCKET, SO_RCVBUF); + defined $r or die "getsockopt: $!"; + $r = unpack('i', $r) if defined $r; + $r = (defined $r && $r > 0) ? $r : 8192; + close $s1; + close $s2; + eval 'use constant UNIX_RCVBUF_SIZE => $r'; +} sub server { return MogileFS::Server->server; } # database checking/connecting -sub validate_dbh { Mgd::get_store()->recheck_dbh } -sub get_dbh { return Mgd::get_store()->dbh } +sub validate_dbh { + my $sto = Mgd::get_store(); + my $had_dbh = $sto->have_dbh; + $sto->recheck_dbh(); + my $dbh; + eval { $dbh = $sto->dbh }; + # Doesn't matter what the failure was; workers should retry later. + error("Error validating master DB: $@") if $@ && $had_dbh; + return $dbh; +} # the eventual replacement for callers asking for a dbh directly: # they'll ask for the current store, which is a database abstraction @@ -258,10 +260,27 @@ sub set_store { $store_pid = $$; } +sub domain_factory { + return MogileFS::Factory::Domain->get_factory; +} + +sub class_factory { + return MogileFS::Factory::Class->get_factory; +} + +sub host_factory { + return MogileFS::Factory::Host->get_factory; +} + +sub device_factory { + return MogileFS::Factory::Device->get_factory; +} + # log stuff to syslog or the screen sub log { # simple logging functionality if (! $MogileFS::Config::daemonize) { + $| = 1; # syslog acts like printf so we have to use printf and append a \n shift; # ignore the first parameter (info, warn, critical, etc) my $mask = shift; # format string diff --git a/lib/MogileFS/Store.pm b/lib/MogileFS/Store.pm index b118c313..c16aec14 100644 --- a/lib/MogileFS/Store.pm +++ b/lib/MogileFS/Store.pm @@ -1,10 +1,10 @@ package MogileFS::Store; use strict; use warnings; -use Carp qw(croak); +use Carp qw(croak confess); use MogileFS::Util qw(throw max error); use DBI; # no reason a Store has to be DBI-based, but for now they all are. -use List::Util (); +use List::Util qw(shuffle); # this is incremented whenever the schema changes. server will refuse # to start-up with an old schema version @@ -18,7 +18,11 @@ use List::Util (); # 12: adds 'file_to_delete2' table # 13: modifies 'server_settings.value' to TEXT for wider values # also adds a TEXT 'arg' column to file_to_queue for passing arguments -use constant SCHEMA_VERSION => 13; +# 14: modifies 'device' mb_total, mb_used to INT for devs > 16TB +# 15: adds checksum table, adds 'hashtype' column to 'class' table +# 16: no-op, see 17 +# 17: adds 'readonly' state to enum in host table +use constant SCHEMA_VERSION => 17; sub new { my ($class) = @_; @@ -48,12 +52,15 @@ sub new_from_dsn_user_pass { pass => $pass, max_handles => $max_handles, # Max number of handles to allow raise_errors => $subclass->want_raise_errors, - slave_list_cachetime => 0, + slave_list_version => 0, slave_list_cache => [], recheck_req_gen => 0, # incremented generation, of recheck of dbh being requested recheck_done_gen => 0, # once recheck is done, copy of what the request generation was handles_left => 0, # amount of times this handle can still be verified - server_setting_cache => {}, # value-agnostic db setting cache. + connected_slaves => {}, + dead_slaves => {}, + dead_backoff => {}, # how many times in a row a slave has died + connect_timeout => 10, # High default. }, $subclass; $self->init; return $self; @@ -124,6 +131,7 @@ sub grant_privileges { sub can_replace { 0 } sub can_insertignore { 0 } sub can_insert_multi { 0 } +sub can_for_update { 1 } sub unix_timestamp { die "No function in $_[0] to return DB's unixtime." } @@ -149,10 +157,14 @@ sub raise_errors { $self->dbh->{RaiseError} = 1; } +sub set_connect_timeout { $_[0]{connect_timeout} = $_[1]; } + sub dsn { $_[0]{dsn} } sub user { $_[0]{user} } sub pass { $_[0]{pass} } +sub connect_timeout { $_[0]{connect_timeout} } + sub init { 1 } sub post_dbi_connect { 1 } @@ -162,12 +174,24 @@ sub mark_as_slave { my $self = shift; die "Incapable of becoming slave." unless $self->can_do_slaves; - $self->{slave} = 1; + $self->{is_slave} = 1; } sub is_slave { my $self = shift; - return $self->{slave}; + return $self->{is_slave}; +} + +sub _slaves_list_changed { + my $self = shift; + my $ver = MogileFS::Config->server_setting_cached('slave_version') || 0; + if ($ver <= $self->{slave_list_version}) { + return 0; + } + $self->{slave_list_version} = $ver; + # Restart connections from scratch if the configuration changed. + $self->{connected_slaves} = {}; + return 1; } # Returns a list of arrayrefs, each being [$dsn, $username, $password] for connecting to a slave DB. @@ -175,19 +199,12 @@ sub _slaves_list { my $self = shift; my $now = time(); - # only reload every 15 seconds. - if ($self->{slave_list_cachetime} > $now - 15) { - return @{$self->{slave_list_cache}}; - } - $self->{slave_list_cachetime} = $now; - $self->{slave_list_cache} = []; - - my $sk = MogileFS::Config->server_setting('slave_keys') + my $sk = MogileFS::Config->server_setting_cached('slave_keys') or return (); my @ret; foreach my $key (split /\s*,\s*/, $sk) { - my $slave = MogileFS::Config->server_setting("slave_$key"); + my $slave = MogileFS::Config->server_setting_cached("slave_$key"); if (!$slave) { error("key for slave DB config: slave_$key not found in configuration"); @@ -202,30 +219,95 @@ sub _slaves_list { push @ret, [$dsn, $user, $pass] } - $self->{slave_list_cache} = \@ret; return @ret; } +sub _pick_slave { + my $self = shift; + my @temp = shuffle keys %{$self->{connected_slaves}}; + return unless @temp; + return $self->{connected_slaves}->{$temp[0]}; +} + +sub _connect_slave { + my $self = shift; + my $slave_fulldsn = shift; + my $now = time(); + + my $dead_retry = + MogileFS::Config->server_setting_cached('slave_dead_retry_timeout') || 15; + + my $dead_backoff = $self->{dead_backoff}->{$slave_fulldsn->[0]} || 0; + my $dead_timeout = $self->{dead_slaves}->{$slave_fulldsn->[0]}; + return if (defined $dead_timeout + && $dead_timeout + ($dead_retry * $dead_backoff) > $now); + return if ($self->{connected_slaves}->{$slave_fulldsn->[0]}); + + my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); + $newslave->set_connect_timeout( + MogileFS::Config->server_setting_cached('slave_connect_timeout') || 1); + $self->{slave}->{next_check} = 0; + $newslave->mark_as_slave; + if ($self->check_slave) { + $self->{connected_slaves}->{$slave_fulldsn->[0]} = $newslave; + $self->{dead_backoff}->{$slave_fulldsn->[0]} = 0; + } else { + # Magic numbers are saddening... + $dead_backoff++ unless $dead_backoff > 20; + $self->{dead_slaves}->{$slave_fulldsn->[0]} = $now; + $self->{dead_backoff}->{$slave_fulldsn->[0]} = $dead_backoff; + } +} + sub get_slave { my $self = shift; die "Incapable of having slaves." unless $self->can_do_slaves; - return $self->{slave} if $self->check_slave; + $self->{slave} = undef; + foreach my $slave (keys %{$self->{dead_slaves}}) { + my ($full_dsn) = grep { $slave eq $_->[0] } @{$self->{slave_list_cache}}; + unless ($full_dsn) { + delete $self->{dead_slaves}->{$slave}; + next; + } + $self->_connect_slave($full_dsn); + } + + unless ($self->_slaves_list_changed) { + if ($self->{slave} = $self->_pick_slave) { + $self->{slave}->{recheck_req_gen} = $self->{recheck_req_gen}; + return $self->{slave} if $self->check_slave; + } + } + if ($self->{slave}) { + my $dsn = $self->{slave}->{dsn}; + $self->{dead_slaves}->{$dsn} = time(); + $self->{dead_backoff}->{$dsn} = 0; + delete $self->{connected_slaves}->{$dsn}; + error("Error talking to slave: $dsn"); + } my @slaves_list = $self->_slaves_list; # If we have no slaves, then return silently. return unless @slaves_list; + my $slave_skip_filtering = MogileFS::Config->server_setting_cached('slave_skip_filtering'); + + unless (defined $slave_skip_filtering && $slave_skip_filtering eq 'on') { + MogileFS::run_global_hook('slave_list_filter', \@slaves_list); + } + + $self->{slave_list_cache} = \@slaves_list; + foreach my $slave_fulldsn (@slaves_list) { - my $newslave = $self->{slave} = $self->new_from_dsn_user_pass(@$slave_fulldsn); - $self->{slave_next_check} = 0; - $newslave->mark_as_slave; - return $newslave - if $self->check_slave; + $self->_connect_slave($slave_fulldsn); } + if ($self->{slave} = $self->_pick_slave) { + return $self->{slave}; + } warn "Slave list exhausted, failing back to master."; return; } @@ -237,7 +319,6 @@ sub read_store { if ($self->{slave_ok}) { if (my $slave = $self->get_slave) { - $slave->{recheck_req_gen} = $self->{recheck_req_gen}; return $slave; } } @@ -268,25 +349,52 @@ sub dbh { if ($self->{recheck_done_gen} != $self->{recheck_req_gen}) { $self->{dbh} = undef unless $self->{dbh}->ping; # Handles a memory leak under Solaris/Postgres. + # We may leak a little extra memory if we're holding a lock, + # since dropping a connection mid-lock is fatal $self->{dbh} = undef if ($self->{max_handles} && - $self->{handles_left}-- < 0); + $self->{handles_left}-- < 0 && !$self->{lock_depth}); $self->{recheck_done_gen} = $self->{recheck_req_gen}; } return $self->{dbh} if $self->{dbh}; } - $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { - PrintError => 0, - AutoCommit => 1, - # FUTURE: will default to on (have to validate all callers first): - RaiseError => ($self->{raise_errors} || 0), - }) or + # Shortcut flag: if monitor thinks the master is down, avoid attempting to + # connect to it for now. If we already have a connection to the master, + # keep using it as above. + if (!$self->is_slave) { + my $flag = MogileFS::Config->server_setting_cached('_master_db_alive', 0); + return if (defined $flag && $flag == 0);; + } + + # auto-reconnect is unsafe if we're holding a lock + if ($self->{lock_depth}) { + die "DB connection recovery unsafe, lock held: $self->{last_lock}"; + } + + eval { + local $SIG{ALRM} = sub { die "timeout\n" }; + alarm($self->connect_timeout); + $self->{dbh} = DBI->connect($self->{dsn}, $self->{user}, $self->{pass}, { + PrintError => 0, + AutoCommit => 1, + # FUTURE: will default to on (have to validate all callers first): + RaiseError => ($self->{raise_errors} || 0), + sqlite_use_immediate_transaction => 1, + }); + }; + alarm(0); + if ($@ eq "timeout\n") { + die "Failed to connect to database: timeout"; + } elsif ($@) { die "Failed to connect to database: " . DBI->errstr; + } $self->post_dbi_connect; $self->{handles_left} = $self->{max_handles} if $self->{max_handles}; return $self->{dbh}; } +sub have_dbh { return 1 if $_[0]->{dbh}; } + sub ping { my $self = shift; return $self->dbh->ping; @@ -295,7 +403,7 @@ sub ping { sub condthrow { my ($self, $optmsg) = @_; my $dbh = $self->dbh; - return unless $dbh->err; + return 1 unless $dbh->err; my ($pkg, $fn, $line) = caller; my $msg = "Database error from $pkg/$fn/$line: " . $dbh->errstr; $msg .= ": $optmsg" if $optmsg; @@ -340,6 +448,7 @@ sub conddup { my ($self, $code) = @_; my $rv = eval { $code->(); }; throw("dup") if $self->was_duplicate_error; + croak($@) if $@; return $rv; } @@ -380,9 +489,7 @@ sub retry_on_deadlock { while ($tries-- > 0) { $rv = eval { $code->(); }; next if ($self->was_deadlock_error); - if ($@) { - croak($@) unless $self->dbh->err; - } + croak($@) if $@; last; } return $rv; @@ -401,7 +508,7 @@ use constant TABLES => qw( domain class file tempfile file_to_delete unreachable_fids file_on file_on_corrupt host device server_settings file_to_replicate file_to_delete_later fsck_log file_to_queue - file_to_delete2 ); + file_to_delete2 checksum); sub setup_database { my $sto = shift; @@ -435,6 +542,9 @@ sub setup_database { $sto->upgrade_add_class_replpolicy; $sto->upgrade_modify_server_settings_value; $sto->upgrade_add_file_to_queue_arg; + $sto->upgrade_modify_device_size; + $sto->upgrade_add_class_hashtype; + $sto->upgrade_add_host_readonly; return 1; } @@ -501,7 +611,8 @@ sub TABLE_class { PRIMARY KEY (dmid,classid), classname VARCHAR(50), UNIQUE (dmid,classname), - mindevcount TINYINT UNSIGNED NOT NULL + mindevcount TINYINT UNSIGNED NOT NULL, + hashtype TINYINT UNSIGNED )" } @@ -625,8 +736,8 @@ sub TABLE_device { status ENUM('alive','dead','down'), weight MEDIUMINT DEFAULT 100, - mb_total MEDIUMINT UNSIGNED, - mb_used MEDIUMINT UNSIGNED, + mb_total INT UNSIGNED, + mb_used INT UNSIGNED, mb_asof INT UNSIGNED, PRIMARY KEY (devid), INDEX (status) @@ -707,6 +818,14 @@ sub TABLE_file_to_delete2 { )" } +sub TABLE_checksum { + "CREATE TABLE checksum ( + fid INT UNSIGNED NOT NULL PRIMARY KEY, + hashtype TINYINT UNSIGNED NOT NULL, + checksum VARBINARY(64) NOT NULL + )" +} + # these five only necessary for MySQL, since no other database existed # before, so they can just create the tables correctly to begin with. # in the future, there might be new alters that non-MySQL databases @@ -719,6 +838,7 @@ sub upgrade_add_device_readonly { 1 } sub upgrade_add_device_drain { die "Not implemented in $_[0]" } sub upgrade_modify_server_settings_value { die "Not implemented in $_[0]" } sub upgrade_add_file_to_queue_arg { die "Not implemented in $_[0]" } +sub upgrade_modify_device_size { die "Not implemented in $_[0]" } sub upgrade_add_class_replpolicy { my ($self) = @_; @@ -727,6 +847,13 @@ sub upgrade_add_class_replpolicy { } } +sub upgrade_add_class_hashtype { + my ($self) = @_; + unless ($self->column_type("class", "hashtype")) { + $self->dowell("ALTER TABLE class ADD COLUMN hashtype TINYINT UNSIGNED"); + } +} + # return true if deleted, 0 if didn't exist, exception if error sub delete_host { my ($self, $hostid) = @_; @@ -736,7 +863,27 @@ sub delete_host { # return true if deleted, 0 if didn't exist, exception if error sub delete_domain { my ($self, $dmid) = @_; - return $self->dbh->do("DELETE FROM domain WHERE dmid = ?", undef, $dmid); + my ($err, $rv); + my $dbh = $self->dbh; + eval { + $dbh->begin_work; + if ($self->domain_has_files($dmid)) { + $err = "has_files"; + } elsif ($self->domain_has_classes($dmid)) { + $err = "has_classes"; + } else { + $rv = $dbh->do("DELETE FROM domain WHERE dmid = ?", undef, $dmid); + + # remove the "default" class if one was created (for mindevcount) + # this is currently the only way to delete the "default" class + $dbh->do("DELETE FROM class WHERE dmid = ? AND classid = 0", undef, $dmid); + $dbh->commit; + } + $dbh->rollback if $err; + }; + $self->condthrow; # will rollback on errors + throw($err) if $err; + return $rv; } sub domain_has_files { @@ -746,6 +893,15 @@ sub domain_has_files { return $has_a_fid ? 1 : 0; } +sub domain_has_classes { + my ($self, $dmid) = @_; + # queryworker does not permit removing default class, so domain_has_classes + # should not register the default class + my $has_a_class = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? AND classid != 0 LIMIT 1', + undef, $dmid); + return defined($has_a_class); +} + sub class_has_files { my ($self, $dmid, $clid) = @_; my $has_a_fid = $self->dbh->selectrow_array('SELECT fid FROM file WHERE dmid = ? AND classid = ? LIMIT 1', @@ -755,27 +911,36 @@ sub class_has_files { # return new classid on success (non-zero integer), die on failure # throw 'dup' on duplicate name -# override this if you want a less racy version. sub create_class { my ($self, $dmid, $classname) = @_; my $dbh = $self->dbh; - # get the max class id in this domain - my $maxid = $dbh->selectrow_array - ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; + my ($clsid, $rv); - # now insert the new class - my $rv = eval { - $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", - undef, $dmid, $maxid + 1, $classname, 2); + eval { + $dbh->begin_work; + if ($classname eq 'default') { + $clsid = 0; + } else { + # get the max class id in this domain + my $maxid = $dbh->selectrow_array + ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; + $clsid = $maxid + 1; + } + # now insert the new class + $rv = $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", + undef, $dmid, $clsid, $classname, 2); + $dbh->commit if $rv; }; if ($@ || $dbh->err) { if ($self->was_duplicate_error) { + # ensure we're not inside a transaction + if ($dbh->{AutoCommit} == 0) { eval { $dbh->rollback }; } throw("dup"); } } - return $maxid + 1 if $rv; - $self->condthrow; + $self->condthrow; # this will rollback on errors + return $clsid if $rv; die; } @@ -816,6 +981,17 @@ sub update_class_replpolicy { return 1; } +# return 1 on success, die otherwise +sub update_class_hashtype { + my $self = shift; + my %arg = $self->_valid_params([qw(dmid classid hashtype)], @_); + eval { + $self->dbh->do("UPDATE class SET hashtype=? WHERE dmid=? AND classid=?", + undef, $arg{hashtype}, $arg{dmid}, $arg{classid}); + }; + $self->condthrow; +} + sub nfiles_with_dmid_classid_devcount { my ($self, $dmid, $classid, $devcount) = @_; return $self->dbh->selectrow_array('SELECT COUNT(*) FROM file WHERE dmid = ? AND classid = ? AND devcount = ?', @@ -858,21 +1034,6 @@ sub server_setting { undef, $key); } -# generic server setting cache. -# note that you can call the same server setting with different timeouts, but -# the timeout specified at the time of ... timeout, wins. -sub server_setting_cached { - my ($self, $key, $timeout) = @_; - $self->{server_setting_cache}->{$key} ||= {val => '', refresh => 0}; - my $cache = $self->{server_setting_cache}->{$key}; - my $now = time(); - if ($now > $cache->{refresh}) { - $cache->{val} = $self->server_setting($key); - $cache->{refresh} = $now + $timeout; - } - return $cache->{val}; -} - sub server_settings { my ($self) = @_; my $ret = {}; @@ -952,8 +1113,11 @@ sub register_tempfile { return $exists ? 1 : 0; }; + # See notes in MogileFS::Config->check_database + my $min_fidid = MogileFS::Config->config('min_fidid'); + # if the fid is in use, do something - while ($fid_in_use->($fid)) { + while ($fid_in_use->($fid) || $fid <= $min_fidid) { throw("dup") if $explicit_fid_used; # be careful of databases which reset their @@ -996,10 +1160,10 @@ sub file_row_from_fidid { # return an arrayref of rows containing columns "fid, dmid, dkey, length, # classid, devcount" provided a pair of $fidid or undef if no rows. sub file_row_from_fidid_range { - my ($self, $fromfid, $tofid) = @_; + my ($self, $fromfid, $count) = @_; my $sth = $self->dbh->prepare("SELECT fid, dmid, dkey, length, classid, devcount ". - "FROM file WHERE fid BETWEEN ? AND ?"); - $sth->execute($fromfid,$tofid); + "FROM file WHERE fid > ? LIMIT ?"); + $sth->execute($fromfid,$count); return $sth->fetchall_arrayref({}); } @@ -1026,7 +1190,7 @@ sub fid_devids_multiple { # return hashref of columns classid, dmid, dkey, given a $fidid, or return undef sub tempfile_row_from_fid { my ($self, $fidid) = @_; - return $self->dbh->selectrow_hashref("SELECT classid, dmid, dkey ". + return $self->dbh->selectrow_hashref("SELECT classid, dmid, dkey, devids ". "FROM tempfile WHERE fid=?", undef, $fidid); } @@ -1043,21 +1207,51 @@ sub create_device { return 1; } +sub update_device { + my ($self, $devid, $to_update) = @_; + my @keys = sort keys %$to_update; + return unless @keys; + $self->conddup(sub { + $self->dbh->do("UPDATE device SET " . join('=?, ', @keys) + . "=? WHERE devid=?", undef, (map { $to_update->{$_} } @keys), + $devid); + }); + return 1; +} + sub update_device_usage { my $self = shift; - my %arg = $self->_valid_params([qw(mb_total mb_used devid)], @_); + my %arg = $self->_valid_params([qw(mb_total mb_used devid mb_asof)], @_); eval { - $self->dbh->do("UPDATE device SET mb_total = ?, mb_used = ?, mb_asof = " . $self->unix_timestamp . - " WHERE devid = ?", undef, $arg{mb_total}, $arg{mb_used}, $arg{devid}); + $self->dbh->do("UPDATE device SET ". + "mb_total = ?, mb_used = ?, mb_asof = ?" . + " WHERE devid = ?", + undef, $arg{mb_total}, $arg{mb_used}, $arg{mb_asof}, + $arg{devid}); }; $self->condthrow; } -sub mark_fidid_unreachable { - my ($self, $fidid) = @_; - die "Your database does not support REPLACE! Reimplement mark_fidid_unreachable!" unless $self->can_replace; - $self->dbh->do("REPLACE INTO unreachable_fids VALUES (?, " . $self->unix_timestamp . ")", - undef, $fidid); +# MySQL has an optimized version +sub update_device_usages { + my ($self, $updates, $cb) = @_; + foreach my $upd (@$updates) { + $self->update_device_usage(%$upd); + $cb->(); + } +} + +# This is unimplemented at the moment as we must verify: +# - no file_on rows exist +# - nothing in file_to_queue is going to attempt to use it +# - nothing in file_to_replicate is going to attempt to use it +# - it's already been marked dead +# - that all trackers are likely to know this :/ +# - ensure the devid can't be reused +# IE; the user can't mark it dead then remove it all at once and cause their +# cluster to implode. +sub delete_device { + die "Unimplemented; needs further testing"; } sub set_device_weight { @@ -1078,22 +1272,33 @@ sub set_device_state { sub delete_class { my ($self, $dmid, $cid) = @_; + throw("has_files") if $self->class_has_files($dmid, $cid); eval { $self->dbh->do("DELETE FROM class WHERE dmid = ? AND classid = ?", undef, $dmid, $cid); }; $self->condthrow; } +# called from a queryworker process, will trigger delete_fidid_enqueued +# in the delete worker sub delete_fidid { my ($self, $fidid) = @_; eval { $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); }; $self->condthrow; - eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; - $self->condthrow; $self->enqueue_for_delete2($fidid, 0); $self->condthrow; } +# Only called from delete workers (after delete_fidid), +# this reduces client-visible latency from the queryworker +sub delete_fidid_enqueued { + my ($self, $fidid) = @_; + eval { $self->delete_checksum($fidid); }; + $self->condthrow; + eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; + $self->condthrow; +} + sub delete_tempfile_row { my ($self, $fidid) = @_; my $rv = eval { $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); }; @@ -1112,12 +1317,12 @@ sub delete_and_return_tempfile_row { sub replace_into_file { my $self = shift; - my %arg = $self->_valid_params([qw(fidid dmid key length classid)], @_); + my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); die "Your database does not support REPLACE! Reimplement replace_into_file!" unless $self->can_replace; eval { $self->dbh->do("REPLACE INTO file (fid, dmid, dkey, length, classid, devcount) ". - "VALUES (?,?,?,?,?,0) ", undef, - @arg{'fidid', 'dmid', 'key', 'length', 'classid'}); + "VALUES (?,?,?,?,?,?) ", undef, + @arg{'fidid', 'dmid', 'key', 'length', 'classid', 'devcount'}); }; $self->condthrow; } @@ -1144,6 +1349,13 @@ sub rename_file { return 1; } +sub get_domainid_by_name { + my $self = shift; + my ($dmid) = $self->dbh->selectrow_array('SELECT dmid FROM domain WHERE namespace = ?', + undef, $_[0]); + return $dmid; +} + # returns a hash of domains. Key is namespace, value is dmid. sub get_all_domains { my ($self) = @_; @@ -1151,17 +1363,27 @@ sub get_all_domains { return map { ($_->[0], $_->[1]) } @{$domains || []}; } +sub get_classid_by_name { + my $self = shift; + my ($classid) = $self->dbh->selectrow_array('SELECT classid FROM class WHERE dmid = ? AND classname = ?', + undef, $_[0], $_[1]); + return $classid; +} + # returns an array of hashrefs, one hashref per row in the 'class' table sub get_all_classes { my ($self) = @_; my (@ret, $row); - my $repl_col = ""; + my @cols = qw/dmid classid classname mindevcount/; if ($self->cached_schema_version >= 10) { - $repl_col = ", replpolicy"; + push @cols, 'replpolicy'; + if ($self->cached_schema_version >= 15) { + push @cols, 'hashtype'; + } } - - my $sth = $self->dbh->prepare("SELECT dmid, classid, classname, mindevcount $repl_col FROM class"); + my $cols = join(', ', @cols); + my $sth = $self->dbh->prepare("SELECT $cols FROM class"); $sth->execute; push @ret, $row while $row = $sth->fetchrow_hashref; return @ret; @@ -1193,6 +1415,21 @@ sub remove_fidid_from_devid { return $rv; } +# Test if host exists. +sub get_hostid_by_id { + my $self = shift; + my ($hostid) = $self->dbh->selectrow_array('SELECT hostid FROM host WHERE hostid = ?', + undef, $_[0]); + return $hostid; +} + +sub get_hostid_by_name { + my $self = shift; + my ($hostid) = $self->dbh->selectrow_array('SELECT hostid FROM host WHERE hostname = ?', + undef, $_[0]); + return $hostid; +} + # get all hosts from database, returns them as list of hashrefs, hashrefs being the row contents. sub get_all_hosts { my ($self) = @_; @@ -1250,8 +1487,10 @@ sub update_classid { sub enqueue_for_replication { my ($self, $fidid, $from_devid, $in) = @_; - $in = 0 unless $in; - my $nexttry = $self->unix_timestamp . " + " . int($in); + my $nexttry = 0; + if ($in) { + $nexttry = $self->unix_timestamp . " + " . int($in); + } $self->retry_on_deadlock(sub { $self->insert_ignore("INTO file_to_replicate (fid, fromdevid, nexttry) ". @@ -1297,7 +1536,7 @@ sub enqueue_for_todo { # return 1 on success. die otherwise. sub enqueue_many_for_todo { my ($self, $fidids, $type, $in) = @_; - if (@$fidids > 1 && ! ($self->can_insert_multi && ($self->can_replace || $self->can_insertignore))) { + if (! ($self->can_insert_multi && ($self->can_replace || $self->can_insertignore))) { $self->enqueue_for_todo($_, $type, $in) foreach @$fidids; return 1; } @@ -1315,7 +1554,7 @@ sub enqueue_many_for_todo { } else { $self->dbh->do($self->ignore_replace . " INTO file_to_queue (fid, type, nexttry) VALUES " . - join(",", map { "(" . int($_->{fid}) . ", $type, $nexttry)" } @$fidids)); + join(",", map { "(" . int($_) . ", $type, $nexttry)" } @$fidids)); } }); $self->condthrow; @@ -1389,36 +1628,16 @@ sub get_fidid_chunks_by_device { return $fidids; } -# takes two arguments, fidid to be above, and optional limit (default -# 1,000). returns up to that that many fidids above the provided -# fidid. returns array of MogileFS::FID objects, sorted by fid ids. -sub get_fids_above_id { - my ($self, $fidid, $limit) = @_; - $limit ||= 1000; - $limit = int($limit); - - my @ret; - my $dbh = $self->dbh; - my $sth = $dbh->prepare("SELECT fid, dmid, dkey, length, classid, devcount ". - "FROM file ". - "WHERE fid > ? ". - "ORDER BY fid LIMIT $limit"); - $sth->execute($fidid); - while (my $row = $sth->fetchrow_hashref) { - push @ret, MogileFS::FID->new_from_db_row($row); - } - return @ret; -} - -# Same as above, but returns unblessed hashref. -sub get_fidids_above_id { - my ($self, $fidid, $limit) = @_; +# gets fidids above fidid_low up to (and including) fidid_high +sub get_fidids_between { + my ($self, $fidid_low, $fidid_high, $limit) = @_; $limit ||= 1000; $limit = int($limit); my $dbh = $self->dbh; - my $fidids = $dbh->selectcol_arrayref(qq{SELECT fid FROM file WHERE fid > ? - ORDER BY fid LIMIT $limit}); + my $fidids = $dbh->selectcol_arrayref(qq{SELECT fid FROM file + WHERE fid > ? and fid <= ? + ORDER BY fid LIMIT $limit}, undef, $fidid_low, $fidid_high); return $fidids; } @@ -1442,10 +1661,14 @@ sub create_domain { die "failed to make domain"; # FIXME: the above is racy. } -sub update_host_property { - my ($self, $hostid, $col, $val) = @_; +sub update_host { + my ($self, $hid, $to_update) = @_; + my @keys = sort keys %$to_update; + return unless @keys; $self->conddup(sub { - $self->dbh->do("UPDATE host SET $col=? WHERE hostid=?", undef, $val, $hostid); + $self->dbh->do("UPDATE host SET " . join('=?, ', @keys) + . "=? WHERE hostid=?", undef, (map { $to_update->{$_} } @keys), + $hid); }); return 1; } @@ -1500,21 +1723,24 @@ sub grab_queue_chunk { my $tries = 3; my $work; + return 0 unless $self->lock_queue($queue); + my $extwhere = shift || ''; my $fields = 'fid, nexttry, failcount'; $fields .= ', ' . $extfields if $extfields; eval { $dbh->begin_work; my $ut = $self->unix_timestamp; - my $sth = $dbh->prepare(qq{ + my $query = qq{ SELECT $fields FROM $queue WHERE nexttry <= $ut $extwhere ORDER BY nexttry LIMIT $limit - FOR UPDATE - }); + }; + $query .= "FOR UPDATE\n" if $self->can_for_update; + my $sth = $dbh->prepare($query); $sth->execute; $work = $sth->fetchall_hashref('fid'); # Nothing to work on. @@ -1527,9 +1753,16 @@ sub grab_queue_chunk { }; if ($self->was_deadlock_error) { eval { $dbh->rollback }; - return (); + $work = undef; + } else { + $self->condthrow; } - $self->condthrow; + # FIXME: Super extra paranoia to prevent deadlocking. + # Need to handle or die on all errors above, but $@ can get reset. For now + # we'll just always ensure there's no transaction running at the end here. + # A (near) release should figure the error detection correctly. + if ($dbh->{AutoCommit} == 0) { eval { $dbh->rollback }; } + $self->unlock_queue($queue); return defined $work ? values %$work : (); } @@ -1561,8 +1794,9 @@ sub grab_files_to_queued { # and tell it not to. sub should_begin_replicating_fidid { my ($self, $fidid) = @_; - warn("Inefficient implementation of should_begin_replicating_fidid() in $self!\n"); - 1; + my $lockname = "mgfs:fid:$fidid:replicate"; + return 1 if $self->get_lock($lockname, 1); + return 0; } # called when replicator is done replicating a fid, so you can cleanup @@ -1576,6 +1810,26 @@ sub should_begin_replicating_fidid { # locking in this pair of functions. sub note_done_replicating { my ($self, $fidid) = @_; + my $lockname = "mgfs:fid:$fidid:replicate"; + $self->release_lock($lockname); +} + +sub find_fid_from_file_to_replicate { + my ($self, $fidid) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, nexttry, fromdevid, failcount, flags FROM file_to_replicate WHERE fid = ?", + undef, $fidid); +} + +sub find_fid_from_file_to_delete2 { + my ($self, $fidid) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, nexttry, failcount FROM file_to_delete2 WHERE fid = ?", + undef, $fidid); +} + +sub find_fid_from_file_to_queue { + my ($self, $fidid, $type) = @_; + return $self->dbh->selectrow_hashref("SELECT fid, devid, type, nexttry, failcount, flags, arg FROM file_to_queue WHERE fid = ? AND type = ?", + undef, $fidid, $type); } sub delete_fid_from_file_to_replicate { @@ -1641,15 +1895,23 @@ sub get_keys_like { # fix the input... prefix always ends with a % so that it works # in a LIKE call, and after is either blank or something $prefix = '' unless defined $prefix; + + # escape underscores, % and \ + $prefix =~ s/([%\\_])/\\$1/g; + $prefix .= '%'; $after = '' unless defined $after; + my $like = $self->get_keys_like_operator; + # now select out our keys return $self->dbh->selectcol_arrayref - ('SELECT dkey FROM file WHERE dmid = ? AND dkey LIKE ? AND dkey > ? ' . - "ORDER BY dkey LIMIT $limit", undef, $dmid, $prefix, $after); + ("SELECT dkey FROM file WHERE dmid = ? AND dkey $like ? ESCAPE ? AND dkey > ? " . + "ORDER BY dkey LIMIT $limit", undef, $dmid, $prefix, "\\", $after); } +sub get_keys_like_operator { return "LIKE"; } + # return arrayref of all tempfile rows (themselves also arrayrefs, of [$fidid, $devids]) # that were created $secs_ago seconds ago or older. sub old_tempfiles { @@ -1856,7 +2118,37 @@ sub fsck_evcode_counts { # run before daemonizing. you can die from here if you see something's amiss. or emit # warnings. -sub pre_daemonize_checks { } +sub pre_daemonize_checks { + my $self = shift; + + $self->pre_daemonize_check_slaves; +} + +sub pre_daemonize_check_slaves { + my $sk = MogileFS::Config->server_setting('slave_keys') + or return; + + my @slaves; + foreach my $key (split /\s*,\s*/, $sk) { + my $slave = MogileFS::Config->server_setting("slave_$key"); + + if (!$slave) { + error("key for slave DB config: slave_$key not found in configuration"); + next; + } + + my ($dsn, $user, $pass) = split /\|/, $slave; + if (!defined($dsn) or !defined($user) or !defined($pass)) { + error("key slave_$key contains $slave, which doesn't split in | into DSN|user|pass - ignoring"); + next; + } + push @slaves, [$dsn, $user, $pass] + } + + return unless @slaves; # Escape this block if we don't have a set of slaves anyways + + MogileFS::run_global_hook('slave_list_check', \@slaves); +} # attempt to grab a lock of lockname, and timeout after timeout seconds. @@ -1874,23 +2166,57 @@ sub release_lock { die "release_lock not implemented for $self"; } -# returns up to $limit @fidids which are on provided $devid -sub random_fids_on_device { - my ($self, $devid, $limit) = @_; - $limit = int($limit) || 100; +# MySQL has an issue where you either get excessive deadlocks, or INSERT's +# hang forever around some transactions. Use ghetto locking to cope. +sub lock_queue { 1 } +sub unlock_queue { 1 } + +sub BLOB_BIND_TYPE { undef; } +sub set_checksum { + my ($self, $fidid, $hashtype, $checksum) = @_; my $dbh = $self->dbh; + die "Your database does not support REPLACE! Reimplement set_checksum!" unless $self->can_replace; + + eval { + my $sth = $dbh->prepare("REPLACE INTO checksum " . + "(fid, hashtype, checksum) " . + "VALUES (?, ?, ?)"); + $sth->bind_param(1, $fidid); + $sth->bind_param(2, $hashtype); + $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); + $sth->execute; + }; + $self->condthrow; +} - # FIXME: this blows. not random. and good chances these will - # eventually get to point where they're un-rebalance-able, and we - # never move on past the first 5000 - my @some_fids = List::Util::shuffle(@{ - $dbh->selectcol_arrayref("SELECT fid FROM file_on WHERE devid=? LIMIT 5000", - undef, $devid) || [] - }); +sub get_checksum { + my ($self, $fidid) = @_; + + $self->dbh->selectrow_hashref("SELECT fid, hashtype, checksum " . + "FROM checksum WHERE fid = ?", + undef, $fidid); +} + +sub delete_checksum { + my ($self, $fidid) = @_; + + $self->dbh->do("DELETE FROM checksum WHERE fid = ?", undef, $fidid); +} + +# setup the value used in a 'nexttry' field to indicate that this item will +# never actually be tried again and require some sort of manual intervention. +use constant ENDOFTIME => 2147483647; + +sub end_of_time { ENDOFTIME; } + +# returns the size of the non-urgent replication queue +# nexttry == 0 - the file is urgent +# nexttry != 0 && nexttry < ENDOFTIME - the file is deferred +sub deferred_repl_queue_length { + my ($self) = @_; - @some_fids = @some_fids[0..$limit-1] if $limit < @some_fids; - return @some_fids; + return $self->dbh->selectrow_array('SELECT COUNT(*) FROM file_to_replicate WHERE nexttry != 0 AND nexttry < ?', undef, $self->end_of_time); } 1; diff --git a/lib/MogileFS/Store/MySQL.pm b/lib/MogileFS/Store/MySQL.pm index 14ec8911..c0f5a3dc 100644 --- a/lib/MogileFS/Store/MySQL.pm +++ b/lib/MogileFS/Store/MySQL.pm @@ -82,21 +82,22 @@ sub check_slave { return 0 unless $self->{slave}; - my $next_check = \$self->{slave_next_check}; + my $next_check = \$self->{slave}->{next_check}; if ($$next_check > time()) { return 1; } - my $master_status = eval { $self->dbh->selectrow_hashref("SHOW MASTER STATUS") }; - warn "Error thrown: '$@' while trying to get master status." if $@; + #my $slave_status = eval { $self->{slave}->dbh->selectrow_hashref("SHOW SLAVE STATUS") }; + #warn "Error thrown: '$@' while trying to get slave status." if $@; - my $slave_status = eval { $self->{slave}->dbh->selectrow_hashref("SHOW SLAVE STATUS") }; - warn "Error thrown: '$@' while trying to get slave status." if $@; - - # compare contrast, return 0 if not okay. - # Master: File Position - # Slave: + # TODO: Check show slave status *unless* a server setting is present to + # tell us to ignore it (like in a multi-DC setup). + eval { $self->{slave}->dbh }; + if ($@) { + warn "Error while checking slave: $@"; + return 0; + } # call time() again here because SQL blocks. $$next_check = time() + 5; @@ -127,6 +128,18 @@ sub release_lock { return $rv; } +sub lock_queue { + my ($self, $type) = @_; + my $lock = $self->get_lock('mfsd:' . $type, 30); + return $lock ? 1 : 0; +} + +sub unlock_queue { + my ($self, $type) = @_; + my $lock = $self->release_lock('mfsd:' . $type); + return $lock ? 1 : 0; +} + # clears everything from the fsck_log table # return 1 on success. die otherwise. # Under MySQL 4.1+ this is actually fast. @@ -191,9 +204,11 @@ sub new_temp { my $port = $args{dbport} || 3306; my $user = $args{dbuser} || 'root'; my $pass = $args{dbpass} || ''; + my $rootuser = $args{dbrootuser} || $args{dbuser} || 'root'; + my $rootpass = $args{dbrootpass} || $args{dbpass} || ''; my $sto = MogileFS::Store->new_from_dsn_user_pass("DBI:mysql:database=$dbname;host=$host;port=$port", - $user, $pass); + $rootuser, $rootpass); my $dbh = $sto->dbh; _create_mysql_db($dbh, $dbname); @@ -201,10 +216,22 @@ sub new_temp { # allow MyISAM in the test suite. $ENV{USE_UNSAFE_MYSQL} = 1 unless defined $ENV{USE_UNSAFE_MYSQL}; - system("$FindBin::Bin/../mogdbsetup", "--yes", "--dbname=$dbname", - "--dbhost=$host", "--dbport=$port", "--dbrootuser=$user", - "--dbrootpass=$pass", "--dbuser=$user", "--dbpass=$pass") - and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; + my @args = ("$FindBin::Bin/../mogdbsetup", "--yes", + "--dbname=$dbname", "--type=MySQL", + "--dbhost=$host", "--dbport=$port", + "--dbrootuser=$rootuser", + "--dbuser=$user", ); + push @args, "--dbpass=$pass" unless $pass eq ''; + push @args, "--dbrootpass=$rootpass" unless $rootpass eq ''; + system(@args) + and die "Failed to run mogdbsetup (".join(' ',map { "'".$_."'" } @args).")."; + + if($user ne $rootuser) { + $sto = MogileFS::Store->new_from_dsn_user_pass( + "DBI:mysql:database=$dbname;host=$host;port=$port", + $user, $pass); + $dbh = $sto->dbh; + } $dbh->do("use $dbname"); return $sto; @@ -257,7 +284,7 @@ sub create_table { # don't alter an existing table up to InnoDB from MyISAM... # could be costly. but on new tables, no problem... unless ($existed) { - $dbh->do("ALTER TABLE $table TYPE=InnoDB"); + $dbh->do("ALTER TABLE $table ENGINE=InnoDB"); warn "DBI reported an error of: '" . $dbh->errstr . "' when trying to " . "alter table type of $table to InnoDB\n" if $dbh->err; } @@ -300,19 +327,6 @@ sub update_devcount_atomic { return 1; } -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - return 1 if $self->get_lock($lockname, 1); - return 0; -} - -sub note_done_replicating { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - $self->release_lock($lockname); -} - sub upgrade_add_host_getport { my $self = shift; # see if they have the get port, else update it @@ -373,6 +387,22 @@ sub upgrade_add_file_to_queue_arg { } } +sub upgrade_modify_device_size { + my $self = shift; + for my $col ('mb_total', 'mb_used') { + if ($self->column_type("device", $col) =~ m/mediumint/i) { + $self->dowell("ALTER TABLE device MODIFY COLUMN $col INT UNSIGNED"); + } + } +} + +sub upgrade_add_host_readonly { + my $self = shift; + unless ($self->column_type("host", "status") =~ /\breadonly\b/) { + $self->dowell("ALTER TABLE host MODIFY COLUMN status ENUM('alive', 'dead', 'down', 'readonly')"); + } +} + sub pre_daemonize_checks { my $self = shift; # Jay Buffington, from the mailing lists, writes: @@ -406,6 +436,37 @@ sub pre_daemonize_checks { die "MySQL self-tests failed. Your DBD::mysql might've been built against an old DBI version.\n"; } + return $self->SUPER::pre_daemonize_checks(); +} + +sub get_keys_like_operator { + my $bool = MogileFS::Config->server_setting_cached('case_sensitive_list_keys'); + return $bool ? "LIKE /*! BINARY */" : "LIKE"; +} + +sub update_device_usages { + my ($self, $updates, $cb) = @_; + $cb->(); + my $chunk = 10000; # in case we hit max_allowed_packet size(!) + while (scalar @$updates) { + my @cur = splice(@$updates, 0, $chunk); + my @set; + foreach my $fld (qw(mb_total mb_used mb_asof)) { + my $s = "$fld = CASE devid\n"; + foreach my $upd (@cur) { + my $devid = $upd->{devid}; + defined($devid) or croak("devid not set\n"); + my $val = $upd->{$fld}; + defined($val) or croak("$fld not defined for $devid\n"); + $s .= "WHEN $devid THEN $val\n"; + } + $s .= "ELSE $fld END"; + push @set, $s; + } + my $sql = "UPDATE device SET ". join(",\n", @set); + $self->dowell($sql); + $cb->(); + } } 1; diff --git a/lib/MogileFS/Store/Postgres.pm b/lib/MogileFS/Store/Postgres.pm index 56cf5da0..91e39c4e 100644 --- a/lib/MogileFS/Store/Postgres.pm +++ b/lib/MogileFS/Store/Postgres.pm @@ -34,7 +34,7 @@ sub want_raise_errors { 1 } # if it it's made, or already exists. die otherwise. sub create_db_if_not_exists { my ($pkg, $rdbh, $dbname) = @_; - if(not $rdbh->do("CREATE DATABASE $dbname")) { + if(not $rdbh->do("CREATE DATABASE $dbname TEMPLATE template0 ENCODING 'UTF-8'" )) { die "Failed to create database '$dbname': " . $rdbh->errstr . "\n" if ($rdbh->errstr !~ /already exists/); } } @@ -62,7 +62,8 @@ sub init { $self->SUPER::init; my $database_version = $self->dbh->get_info(18); # SQL_DBMS_VER # We need >=pg-8.2 because we use SAVEPOINT and ROLLBACK TO. - die "Postgres is too old! Must use >=postgresql-8.2!" if($database_version =~ /\A0[0-7]\.|08\.0[01]/); + # We need >=pg-8.4 for working advisory locks + die "Postgres is too old! Must use >=postgresql-8.4!" if($database_version =~ /\A0[0-7]\.|08\.0[0123]/); $self->{lock_depth} = 0; } @@ -110,6 +111,7 @@ sub setup_database { sub filter_create_sql { my ($self, $sql) = @_; $sql =~ s/\bUNSIGNED\b//g; + $sql =~ s/\bVARBINARY\(\d+\)/bytea/g; $sql =~ s/\b(?:TINY|MEDIUM)INT\b/SMALLINT/g; $sql =~ s/\bINT\s+NOT\s+NULL\s+AUTO_INCREMENT\b/SERIAL/g; $sql =~ s/# /-- /g; @@ -290,10 +292,22 @@ sub upgrade_add_device_drain { } } +sub upgrade_add_host_readonly { + my $self = shift; + my $cn; + unless ($self->column_constraint("host", "status", \$cn) =~ /\breadonly\b/) { + $self->dbh->begin_work; + $self->dowell("ALTER TABLE host DROP CONSTRAINT $cn"); + $self->dowell("ALTER TABLE host ADD CONSTRAINT status CHECK(". + "status IN ('alive', 'dead', 'down', 'readonly'))"); + $self->dbh->commit; + } +} + sub upgrade_modify_server_settings_value { my $self = shift; unless ($self->column_type("server_settings", "value" =~ /text/i)) { - $self->dowell("ALTER TABLE server_settings MODIFY COLUMN value TEXT"); + $self->dowell("ALTER TABLE server_settings ALTER COLUMN value TYPE TEXT"); } } @@ -304,28 +318,83 @@ sub upgrade_add_file_to_queue_arg { } } +# Postgres doesn't have or never used a MEDIUMINT for device. +sub upgrade_modify_device_size { + return 1; +} + +sub upgrade_add_class_hashtype { + my ($self) = @_; + unless ($self->column_type("class", "hashtype")) { + $self->dowell("ALTER TABLE class ADD COLUMN hashtype SMALLINT"); + } +} + # return 1 on success. die otherwise. sub enqueue_fids_to_delete { + # My kingdom for a real INSERT IGNORE implementation! my ($self, @fidids) = @_; my $sql = "INSERT INTO file_to_delete (fid) VALUES (?)"; - my $savepoint_name = "savepoint_enqueue_fids_to_delete"; - $self->dbh->begin_work; foreach my $fidid (@fidids) { - $self->dbh->do('SAVEPOINT '.$savepoint_name); + $self->dbh->begin_work; $self->condthrow; eval { $self->dbh->do($sql, undef, $fidid); }; if ($@ || $self->dbh->err) { if ($self->was_duplicate_error) { - $self->dbh->do('ROLLBACK TO '.$savepoint_name); + # Do nothing + } else { + $self->condthrow; } - $self->condthrow; } + $self->dbh->commit; + } + +} + +sub enqueue_fids_to_delete2 { + # My kingdom for a real REPLACE implementation! + my ($self, @fidids) = @_; + my $tbl = 'file_to_delete2'; + my $sql1 = sprintf "INSERT INTO %s (fid, nexttry) VALUES (?,%s)", $tbl, $self->unix_timestamp; + my @dup_fids; + + foreach my $fidid (@fidids) { + $self->dbh->begin_work; + $self->condthrow; + eval { + $self->dbh->do($sql1, undef, $fidid); + }; + if ($@ || $self->dbh->err) { + if ($self->was_duplicate_error) { + push @dup_fids, $fidid; + } else { + $self->condthrow; + } + } + $self->dbh->commit; + } + + my $sql2 = sprintf 'UPDATE %s SET nexttry = %s WHERE fid IN (?)', $tbl, $self->unix_timestamp; + + foreach my $fidid (@dup_fids) { + $self->dbh->begin_work; + $self->condthrow; + eval { + $self->dbh->do($sql2, undef, $fidid); + }; + if ($@ || $self->dbh->err) { + if ($self->was_duplicate_error) { + # Ignore, no need of it + } else { + $self->condthrow; + } + } + $self->dbh->commit; } - $self->dbh->commit; } # -------------------------------------------------------------------------- @@ -383,12 +452,13 @@ sub column_type { } sub column_constraint { - my ($self, $table, $col) = @_; - my $sth = $self->dbh->prepare("SELECT column_name,information_schema.check_constraints.check_clause FROM information_schema.constraint_column_usage JOIN information_schema.check_constraints USING(constraint_catalog,constraint_schema,constraint_name) WHERE table_name=? AND column_name=?"); + my ($self, $table, $col, $cn) = @_; + my $sth = $self->dbh->prepare("SELECT column_name,information_schema.check_constraints.check_clause,constraint_name FROM information_schema.constraint_column_usage JOIN information_schema.check_constraints USING(constraint_catalog,constraint_schema,constraint_name) WHERE table_name=? AND column_name=?"); $sth->execute($table,$col); while (my $rec = $sth->fetchrow_hashref) { if ($rec->{column_name} eq $col) { $sth->finish; + $$cn = $rec->{constraint_name} if $cn; return $rec->{check_clause}; } } @@ -427,26 +497,49 @@ sub fid_type { # -------------------------------------------------------------------------- sub new_temp { - my $dbname = "tmp_mogiletest"; - _drop_db($dbname); - - system("$FindBin::Bin/../mogdbsetup", "--yes", "--dbname=$dbname", "--type=Postgres", "--dbrootuser=postgres") - and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; - - return MogileFS::Store->new_from_dsn_user_pass("dbi:Pg:dbname=$dbname", - "mogile", - ""); + my $self = shift; + my %args = @_; + my $dbname = $args{dbname} || "tmp_mogiletest"; + my $host = $args{dbhost} || 'localhost'; + my $port = $args{dbport} || 5432; + my $user = $args{dbuser} || 'mogile'; + my $pass = $args{dbpass} || ''; + my $rootuser = $args{dbrootuser} || $args{dbuser} || 'postgres'; + my $rootpass = $args{dbrootpass} || $args{dbpass} || ''; + _drop_db($dbname,$host,$port,$rootuser,$rootpass); + + my @args = ( "$FindBin::Bin/../mogdbsetup", "--yes", + "--dbname=$dbname", "--type=Postgres", + "--dbhost=$host", "--dbport=$port", + "--dbuser=$user", + "--dbrootuser=$rootuser", ); + push @args, "--dbpass=$pass" unless $pass eq ''; + push @args, "--dbrootpass=$rootpass" unless $rootpass eq ''; + system(@args) + and die "Failed to run mogdbsetup (".join(' ',map { "'".$_."'" } @args).")."; + + return MogileFS::Store->new_from_dsn_user_pass("dbi:Pg:dbname=$dbname;host=$host;port=$port", + $user, + $pass); } my $rootdbh; sub _root_dbh { - return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres", "postgres", "", { RaiseError => 1 }) - or die "Couldn't connect to local PostgreSQL database as postgres"; + my $host = shift; + my $port = shift; + my $rootuser = shift; + my $rootpass = shift; + return $rootdbh ||= DBI->connect("DBI:Pg:dbname=postgres;host=$host;port=$port", $rootuser, $rootpass, { RaiseError => 1 }) + or die "Couldn't connect to local PostgreSQL database as $rootuser"; } sub _drop_db { my $dbname = shift; - my $root_dbh = _root_dbh(); + my $host = shift; + my $port = shift; + my $rootuser = shift; + my $rootpass = shift; + my $root_dbh = _root_dbh($host, $port, $rootuser, $rootpass); eval { $root_dbh->do("DROP DATABASE $dbname"); }; @@ -457,33 +550,6 @@ sub _drop_db { # Data-access things we override # -------------------------------------------------------------------------- -# return new classid on success (non-zero integer), die on failure -# throw 'dup' on duplicate name -# TODO: add locks around entire table -sub create_class { - my ($self, $dmid, $classname) = @_; - my $dbh = $self->dbh; - - # get the max class id in this domain - my $maxid = $dbh->selectrow_array - ('SELECT MAX(classid) FROM class WHERE dmid = ?', undef, $dmid) || 0; - - # now insert the new class - my $rv = eval { - $dbh->do("INSERT INTO class (dmid, classid, classname, mindevcount) VALUES (?, ?, ?, ?)", - undef, $dmid, $maxid + 1, $classname, 2); - }; - if ($@ || $dbh->err) { - # first is error code for duplicates - if ($self->was_duplicate_error) { - throw("dup"); - } - } - return $maxid + 1 if $rv; - $self->condthrow; - die; -} - # returns 1 on success, 0 on duplicate key error, dies on exception # TODO: need a test to hit the duplicate name error condition sub rename_file { @@ -537,19 +603,6 @@ sub update_devcount_atomic { return $rv; } -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - return 1 if $self->get_lock($lockname, 1); - return 0; -} - -sub note_done_replicating { - my ($self, $fidid) = @_; - my $lockname = "mgfs:fid:$fidid:replicate"; - $self->release_lock($lockname); -} - # enqueue a fidid for replication, from a specific deviceid (can be undef), in a given number of seconds. sub enqueue_for_replication { my ($self, $fidid, $from_devid, $in) = @_; @@ -654,41 +707,14 @@ sub create_device { return 1; } -sub mark_fidid_unreachable { - my ($self, $fidid) = @_; - my $dbh = $self->dbh; - - eval { - $self->insert_or_update( - insert => "INSERT INTO unreachable_fids (fid, lastupdate) VALUES (?, ".$self->unix_timestamp.")", - insert_vals => [ $fidid ], - update => "UPDATE unreachable_fids SET lastupdate = ".$self->unix_timestamp." WHERE field = ?", - update_vals => [ $fidid ], - ); - }; -} - -sub delete_fidid { - my ($self, $fidid) = @_; - $self->dbh->do("DELETE FROM file WHERE fid=?", undef, $fidid); - $self->condthrow; - $self->dbh->do("DELETE FROM tempfile WHERE fid=?", undef, $fidid); - $self->condthrow; - $self->insert_or_ignore( - insert => "INSERT INTO file_to_delete (fid) VALUES (?)", - insert_vals => [ $fidid ], - ); - $self->condthrow; -} - sub replace_into_file { my $self = shift; - my %arg = $self->_valid_params([qw(fidid dmid key length classid)], @_); + my %arg = $self->_valid_params([qw(fidid dmid key length classid devcount)], @_); $self->insert_or_update( - insert => "INSERT INTO file (fid, dmid, dkey, length, classid, devcount) VALUES (?, ?, ?, ?, ?, 0)", - insert_vals => [ @arg{'fidid', 'dmid', 'key', 'length', 'classid'} ], - update => "UPDATE file SET dmid=?, dkey=?, length=?, classid=?, devcount=0 WHERE fid=?", - update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'fidid'} ], + insert => "INSERT INTO file (fid, dmid, dkey, length, classid, devcount) VALUES (?, ?, ?, ?, ?, ?)", + insert_vals => [ @arg{'fidid', 'dmid', 'key', 'length', 'classid', 'devcount'} ], + update => "UPDATE file SET dmid=?, dkey=?, length=?, classid=?, devcount=? WHERE fid=?", + update_vals => [ @arg{'dmid', 'key', 'length', 'classid', 'devcount', 'fidid'} ], ); $self->condthrow; } @@ -722,27 +748,30 @@ sub lockid { # returns 1 on success and 0 on timeout sub get_lock { my ($self, $lockname, $timeout) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); - die "Lock recursion detected (grabbing $lockname ($lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + die sprintf("Lock recursion detected (grabbing %s on %s (%s/%s), had %s (%s). Bailing out.", $lockname, hostname, $hostid, $lockid, $self->{last_lock}, lockid($self->{last_lock})) if $self->{lock_depth}; debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; my $lock = undef; - while($timeout > 0 and not defined($lock)) { - $lock = eval { $self->dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, hostname, $$) }; - if($self->was_duplicate_error) { - $timeout--; - sleep 1; - next; - } + while($timeout >= 0) { + $lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $hostid, $lockid); $self->condthrow; - #$lock = $self->dbh->selectrow_array("SELECT pg_try_advisory_lock(?, ?)", undef, $lockid, $timeout); - #warn("$$ Lock result=$lock\n"); - if (defined $lock and $lock == 1) { - $self->{lock_depth} = 1; - $self->{last_lock} = $lockname; + if (defined $lock) { + if($lock == 1) { + $self->{lock_depth} = 1; + $self->{last_lock} = $lockname; + last; + } elsif($lock == 0) { + sleep 1 if $timeout > 0; + $timeout--; + next; + } else { + die "Something went horribly wrong while getting lock $lockname - unknown return value"; + } } else { - die "Something went horribly wrong while getting lock $lockname"; + die "Something went horribly wrong while getting lock $lockname - undefined lock"; } } return $lock; @@ -752,16 +781,50 @@ sub get_lock { # returns 1 on success and 0 if no lock we have has that name. sub release_lock { my ($self, $lockname) = @_; + my $hostid = lockid(hostname); my $lockid = lockid($lockname); debug("$$ Unlocking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; - #my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?)", undef, $lockid); - my $rv = $self->dbh->do('DELETE FROM lock WHERE lockid=? AND pid=? AND hostname=?', undef, $lockid, $$, hostname); + my $rv = $self->dbh->selectrow_array("SELECT pg_advisory_unlock(?, ?)", undef, $hostid, $lockid); debug("Double-release of lock $lockname!") if $self->{lock_depth} != 0 and $rv == 0 and $Mgd::DEBUG >= 2; $self->condthrow; $self->{lock_depth} = 0; return $rv; } +sub BLOB_BIND_TYPE { { pg_type => PG_BYTEA } } + +sub set_checksum { + my ($self, $fidid, $hashtype, $checksum) = @_; + my $dbh = $self->dbh; + + $dbh->begin_work; + eval { + my $sth = $dbh->prepare("INSERT INTO checksum " . + "(fid, hashtype, checksum) ". + "VALUES (?, ?, ?)"); + $sth->bind_param(1, $fidid); + $sth->bind_param(2, $hashtype); + $sth->bind_param(3, $checksum, BLOB_BIND_TYPE); + $sth->execute; + }; + if ($@ || $dbh->err) { + if ($self->was_duplicate_error) { + eval { + my $sth = $dbh->prepare("UPDATE checksum " . + "SET hashtype = ?, checksum = ? " . + "WHERE fid = ?"); + $sth->bind_param(1, $hashtype); + $sth->bind_param(2, $checksum, BLOB_BIND_TYPE); + $sth->bind_param(3, $fidid); + $sth->execute; + }; + $self->condthrow; + } + } + $dbh->commit; + $self->condthrow; +} + 1; __END__ diff --git a/lib/MogileFS/Store/SQLite.pm b/lib/MogileFS/Store/SQLite.pm index 259734e8..70f1425c 100644 --- a/lib/MogileFS/Store/SQLite.pm +++ b/lib/MogileFS/Store/SQLite.pm @@ -1,7 +1,8 @@ package MogileFS::Store::SQLite; use strict; use warnings; -use DBI; +use DBI qw(:sql_types); +use Digest::MD5 qw(md5); # Used for lockid use DBD::SQLite 1.13; use MogileFS::Util qw(throw); use base 'MogileFS::Store'; @@ -14,6 +15,7 @@ use File::Temp (); sub post_dbi_connect { my $self = shift; $self->{dbh}->func(5000, 'busy_timeout'); + $self->{lock_depth} = 0; } sub want_raise_errors { 1 } @@ -30,8 +32,15 @@ sub dsn_of_root { sub can_replace { 1 } sub can_insertignore { 0 } +sub can_for_update { 0 } sub unix_timestamp { "strftime('%s','now')" } +sub init { + my $self = shift; + $self->SUPER::init; + $self->{lock_depth} = 0; +} + # DBD::SQLite doesn't really have any table meta info methods # And PRAGMA table_info() does not return "real" rows sub column_type { @@ -47,17 +56,115 @@ sub column_type { return undef; } -# Implement these for native database locking -# sub get_lock {} -# sub release_lock {} +sub lockid { + my ($lockname) = @_; + croak("Called with empty lockname! $lockname") unless (defined $lockname && length($lockname) > 0); + my $num = unpack 'N',md5($lockname); + return ($num & 0x7fffffff); +} + +# returns 1 if the lock holder is still alive, 0 if lock holder died +sub lock_holder_alive { + my ($self, $lockid, $lockname) = @_; + my $max_age = 3600; + my $force_unlock; + + my $dbh = $self->dbh; + my ($hostname, $pid, $acquiredat) = $dbh->selectrow_array('SELECT hostname,pid,acquiredat FROM lock WHERE lockid = ?', undef, $lockid); + + # maybe the lock was _just_ released + return 0 unless defined $pid; + + # if the lock is too old, don't check anything else + if (($acquiredat + $max_age) < time) { + $force_unlock = 1; + } elsif ($hostname eq MogileFS::Config->hostname) { + # maybe we were unlucky and the PID got recycled + if ($pid == $$) { + die("Possible lock recursion inside DB but not process (grabbing $lockname ($lockid, acquiredat=$acquiredat)"); + } + + # don't force the lock if the process is still alive + return 1 if kill(0, $pid); + + $force_unlock = 1; + } + + return 0 unless $force_unlock; + + # lock holder is dead or the lock is too old: kill the lock + my $rv = $self->retry_on_deadlock(sub { + $dbh->do('DELETE FROM lock WHERE lockid = ? AND pid = ? AND hostname = ?', undef, $lockid, $pid, $hostname); + }); + + # delete can fail if another process just deleted and regrabbed this lock + return $rv ? 0 : 1; +} + +# attempt to grab a lock of lockname, and timeout after timeout seconds. +# the lock should be unique in the space of (lockid). We can also detect +# if pid is dead as SQLite only runs on one host. +# returns 1 on success and 0 on timeout +sub get_lock { + my ($self, $lockname, $timeout) = @_; + my $lockid = lockid($lockname); + die "Lock recursion detected (grabbing $lockname ($lockid), had $self->{last_lock} (".lockid($self->{last_lock})."). Bailing out." if $self->{lock_depth}; + + debug("$$ Locking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; + my $dbh = $self->dbh; + my $lock = undef; + my $try = sub { + $dbh->do('INSERT INTO lock (lockid,hostname,pid,acquiredat) VALUES (?, ?, ?, '.$self->unix_timestamp().')', undef, $lockid, MogileFS::Config->hostname, $$); + }; + + while ($timeout >= 0 and not defined($lock)) { + $lock = eval { $self->retry_on_deadlock($try) }; + if ($self->was_duplicate_error) { + # retry immediately if the lock holder died + if ($self->lock_holder_alive($lockid, $lockname)) { + sleep 1 if $timeout > 0; + $timeout--; + } + next; + } + $self->condthrow; + if (defined $lock and $lock == 1) { + $self->{lock_depth} = 1; + $self->{last_lock} = $lockname; + } else { + die "Something went horribly wrong while getting lock $lockname"; + } + } + return $lock; +} + +# attempt to release a lock of lockname. +# returns 1 on success and 0 if no lock we have has that name. +sub release_lock { + my ($self, $lockname) = @_; + my $lockid = lockid($lockname); + debug("$$ Unlocking $lockname ($lockid)\n") if $Mgd::DEBUG >= 5; + my $rv = $self->retry_on_deadlock(sub { + $self->dbh->do('DELETE FROM lock WHERE lockid=? AND pid=? AND hostname=?', undef, $lockid, $$, MogileFS::Config->hostname); + }); + debug("Double-release of lock $lockname!") if $self->{lock_depth} != 0 and $rv == 0 and $Mgd::DEBUG >= 2; + $self->condthrow; + $self->{lock_depth} = 0; + return $rv; +} # -------------------------------------------------------------------------- # Store-related things we override # -------------------------------------------------------------------------- -# FIXME: Figure out how to properly detect this. +# from sqlite3.h: +use constant SQLITE_BUSY => 5; # The database file is locked +use constant SQLITE_LOCKED => 6; # A table in the database is locked + sub was_deadlock_error { - return 0; + my $err = $_[0]->dbh->err or return 0; + + ($err == SQLITE_BUSY || $err == SQLITE_LOCKED); } sub was_duplicate_error { @@ -67,6 +174,7 @@ sub was_duplicate_error { my $errstr = $dbh->errstr; return 1 if $errstr =~ /(?:is|are) not unique/i; return 1 if $errstr =~ /must be unique/i; + return 1 if $errstr =~ /UNIQUE constraint failed/i; return 0; } @@ -76,6 +184,7 @@ sub was_duplicate_error { sub new_temp { my ($fh, $filename) = File::Temp::tempfile(); + close($fh); system("$FindBin::Bin/../mogdbsetup", "--type=SQLite", "--yes", "--dbname=$filename") and die "Failed to run mogdbsetup ($FindBin::Bin/../mogdbsetup)."; @@ -94,6 +203,15 @@ sub table_exists { }; } +sub setup_database { + my $self = shift; + # old installations may not have this, add this without changing + # schema version globally (unless the table itself changes) + $self->add_extra_tables('lock'); + $self->create_table('lock'); + return $self->SUPER::setup_database; +} + # -------------------------------------------------------------------------- # Schema # -------------------------------------------------------------------------- @@ -104,6 +222,7 @@ sub TABLE_class { classid TINYINT UNSIGNED NOT NULL, classname VARCHAR(50), mindevcount TINYINT UNSIGNED NOT NULL, + hashtype TINYINT UNSIGNED, UNIQUE (dmid,classid), UNIQUE (dmid,classname) )" @@ -129,8 +248,8 @@ sub TABLE_device { status ENUM('alive','dead','down','readonly','drain'), weight MEDIUMINT DEFAULT 100, - mb_total MEDIUMINT UNSIGNED, - mb_used MEDIUMINT UNSIGNED, + mb_total INT UNSIGNED, + mb_used INT UNSIGNED, mb_asof INT UNSIGNED, PRIMARY KEY (devid), INDEX (status) @@ -205,6 +324,16 @@ sub INDEXES_file_to_delete2 { ("CREATE INDEX file_to_delete2_nexttry ON file_to_delete2 (nexttry)"); } +# Extra table +sub TABLE_lock { + "CREATE TABLE lock ( + lockid INT UNSIGNED NOT NULL PRIMARY KEY, + hostname VARCHAR(255) NOT NULL, + pid INT UNSIGNED NOT NULL, + acquiredat INT UNSIGNED NOT NULL + )" +} + sub filter_create_sql { my ($self, $sql) = @_; $sql =~ s/\bENUM\(.+?\)/TEXT/g; @@ -234,18 +363,19 @@ sub upgrade_add_device_drain { } sub upgrade_modify_server_settings_value { 1 } sub upgrade_add_file_to_queue_arg { 1 } +sub upgrade_modify_device_size { 1 } +sub upgrade_add_host_readonly { 1 } -# inefficient, but no warning and no locking -sub should_begin_replicating_fidid { - my ($self, $fidid) = @_; - return 1; -} +sub BLOB_BIND_TYPE { SQL_BLOB } -# no locking -sub note_done_replicating { - my ($self, $fidid) = @_; -} +sub get_keys_like_operator { + my $self = shift; + my $bool = MogileFS::Config->server_setting_cached('case_sensitive_list_keys'); + # this is a dbh-wide change, but this is the only place we use LIKE + $self->dbh->do("PRAGMA case_sensitive_like = " . ($bool ? "ON" : "OFF")); + return "LIKE"; +} 1; diff --git a/lib/MogileFS/Sys.pm b/lib/MogileFS/Sys.pm deleted file mode 100644 index cf47e336..00000000 --- a/lib/MogileFS/Sys.pm +++ /dev/null @@ -1,13 +0,0 @@ -package MogileFS::Sys; -use strict; -use Socket qw(MSG_NOSIGNAL); -use vars qw($FLAG_NOSIGNAL); - -# used in send() calls to request not to get SIGPIPEd -eval { $FLAG_NOSIGNAL = MSG_NOSIGNAL }; - -sub flag_nosignal { - return $FLAG_NOSIGNAL; -} - -1; diff --git a/lib/MogileFS/Test.pm b/lib/MogileFS/Test.pm index 561b6d87..28adf074 100644 --- a/lib/MogileFS/Test.pm +++ b/lib/MogileFS/Test.pm @@ -7,9 +7,10 @@ use DBI; use FindBin qw($Bin); use IO::Socket::INET; use MogileFS::Server; +use LWP::UserAgent; use base 'Exporter'; -our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker); +our @EXPORT = qw(&find_mogclient_or_skip &temp_store &create_mogstored &create_temp_tracker &try_for &want); sub find_mogclient_or_skip { @@ -33,7 +34,7 @@ sub find_mogclient_or_skip { } unless (eval { TrackerHandle::_mogadm_exe() }) { - warn "Can't find mogadm utility.\n"; + warn "Can't find mogadm utility $@\n"; Test::More::plan('skip_all' => "Can't find mogadm executable, necessary for testing."); } @@ -47,6 +48,8 @@ sub temp_store { my $user = $ENV{MOGTEST_DBUSER} || ''; my $pass = $ENV{MOGTEST_DBPASS} || ''; my $name = $ENV{MOGTEST_DBNAME} || ''; + my $rootuser = $ENV{MOGTEST_DBROOTUSER} || ''; + my $rootpass = $ENV{MOGTEST_DBROOTPASS} || ''; # default to mysql, but make sure DBD::MySQL is installed unless ($type) { @@ -61,8 +64,12 @@ sub temp_store { if ($@) { die "Failed to load $store: $@\n"; } - my $sto = $store->new_temp(dbhost => $host, dbport => $port, - dbuser => $user, dbpass => $pass, dbname => $name); + my %opts = ( dbhost => $host, dbport => $port, + dbuser => $user, dbpass => $pass, + dbname => $name); + $opts{dbrootuser} = $rootuser unless $rootuser eq ''; + $opts{dbrootpass} = $rootpass unless $rootpass eq ''; + my $sto = $store->new_temp(%opts); Mgd::set_store($sto); return $sto; } @@ -117,6 +124,7 @@ sub create_mogstored { die "Failed: tracker already running on port 7500?\n" if $conn; $ENV{PERL5LIB} .= ":$Bin/../lib"; my @args = ("$Bin/../mogstored", + "--skipconfig", "--httplisten=$ip:7500", "--mgmtlisten=$ip:7501", "--maxconns=1000", # because we're not root, put it below 1024 @@ -145,6 +153,43 @@ sub create_mogstored { return undef; } +sub try_for { + my ($tries, $code) = @_; + for (1..$tries) { + return 1 if $code->(); + sleep 1; + } + return 0; +} + +sub want { + my ($admin, $count, $jobclass) = @_; + my $req = "!want $count $jobclass\r\n"; + + syswrite($admin, $req) or die "syswrite: $!\n"; + + my $r = <$admin>; + if ($r =~ /Now desiring $count children doing '$jobclass'/ && <$admin> eq ".\r\n") { + my $rcount; + try_for(30, sub { + $rcount = -1; + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + if ($line =~ /\A$jobclass count (\d+)/) { + $rcount = $1; + } + last if $line eq ".\r\n"; + } + $rcount == $count; + }); + return 1 if $rcount == $count; + die "got $jobclass count $rcount (expected=$count)\n"; + } + die "got bad response for $req: $r\n"; +} + ############################################################################ package ProcessHandle; sub new { @@ -174,13 +219,15 @@ sub ipport { my $_mogadm_exe_cache; sub _mogadm_exe { return $_mogadm_exe_cache if $_mogadm_exe_cache; - foreach my $exe ("$FindBin::Bin/../../utils/mogadm", - "$FindBin::Bin/../../../utils/mogadm", - "/usr/bin/mogadm", - "/usr/sbin/mogadm", - "/usr/local/bin/mogadm", - "/usr/local/sbin/mogadm", + for my $dir ("$FindBin::Bin/../../utils", + "$FindBin::Bin/../../../utils", + split(/:/, $ENV{PATH}), + "/usr/bin", + "/usr/sbin", + "/usr/local/bin", + "/usr/local/sbin", ) { + my $exe = $dir . '/mogadm'; return $_mogadm_exe_cache = $exe if -x $exe; } die "mogadm executable not found.\n"; diff --git a/lib/MogileFS/Util.pm b/lib/MogileFS/Util.pm index 5282eef5..b9cec58d 100644 --- a/lib/MogileFS/Util.pm +++ b/lib/MogileFS/Util.pm @@ -11,8 +11,54 @@ our @EXPORT_OK = qw( error undeferr debug fatal daemonize weighted_list every wait_for_readability wait_for_writeability throw error_code max min first okay_args device_state eurl decode_url_args + encode_url_args apply_state_events apply_state_events_list ); +# Applies monitor-job-supplied state events against the factory singletons. +# Sad this couldn't be an object method, but ProcManager doesn't base off +# anything common. +sub apply_state_events { + my @events = split(/\s/, ${$_[0]}); + shift @events; # pop the :monitor_events part + apply_state_events_list(@events); +} + +sub apply_state_events_list { + # This will needlessly fetch domain/class/host most of the time. + # Maybe replace with something that "caches" factories? + my %factories = ( 'domain' => MogileFS::Factory::Domain->get_factory, + 'class' => MogileFS::Factory::Class->get_factory, + 'host' => MogileFS::Factory::Host->get_factory, + 'device' => MogileFS::Factory::Device->get_factory, ); + + for my $ev (@_) { + my $args = decode_url_args($ev); + my $mode = delete $args->{ev_mode}; + my $type = delete $args->{ev_type}; + my $id = delete $args->{ev_id}; + + # This special case feels gross, but that's what it is. + if ($type eq 'srvset') { + my $val = $mode eq 'set' ? $args->{value} : undef; + MogileFS::Config->cache_server_setting($id, $val); + next; + } + + my $old = $factories{$type}->get_by_id($id); + if ($mode eq 'setstate') { + # Host/Device only. + # FIXME: Make objects slightly mutable and directly set fields? + $factories{$type}->set({ %{$old->fields}, %$args }); + } elsif ($mode eq 'set') { + # Re-add any observed data. + my $observed = $old ? $old->observed_fields : {}; + $factories{$type}->set({ %$args, %$observed }); + } elsif ($mode eq 'remove') { + $factories{$type}->remove($old) if $old; + } + } +} + sub every { my ($delay, $code) = @_; my ($worker, $psock_fd); @@ -34,7 +80,6 @@ sub every { my $now = Time::HiRes::time(); my $took = $now - $start; my $sleep_for = defined $explicit_sleep ? $explicit_sleep : ($delay - $took); - next unless $sleep_for > 0; # simple case, not in a child process (this never happens currently) unless ($psock_fd) { @@ -42,25 +87,18 @@ sub every { next; } - while ($sleep_for > 0) { - my $last_time_pre_sleep = $now; - $worker->forget_woken_up; - if (wait_for_readability($psock_fd, $sleep_for)) { - # TODO: uncomment this and watch an idle server and how many wakeups. could optimize. - #local $Mgd::POST_SLEEP_DEBUG = 1; - #warn "WOKEN UP FROM SLEEP in $worker [$$]\n"; - $worker->read_from_parent; - next CODERUN if $worker->was_woken_up; - } - $now = Time::HiRes::time(); - $sleep_for -= ($now - $last_time_pre_sleep); - } + Time::HiRes::sleep($sleep_for) if $sleep_for > 0; + #local $Mgd::POST_SLEEP_DEBUG = 1; + # This calls read_from_parent. Workers used to needlessly call + # parent_ping constantly. + $worker->parent_ping; } } sub debug { my ($msg, $level) = @_; return unless $Mgd::DEBUG >= 1; + $msg =~ s/[\r\n]+//g; if (my $worker = MogileFS::ProcManager->is_child) { $worker->send_to_parent("debug $msg"); } else { @@ -214,16 +252,6 @@ sub wait_for_writeability { return $nfound ? 1 : 0; } -# if given an HTTP URL, break it down into [ host, port, URI ], else -# returns die, because we don't support non-http-mode anymore -sub url_parts { - my $path = shift; - if ($path =~ m!^http://(.+?)(?::(\d+))?(/.+)$!) { - return [ $1, $2 || 80, $3 ]; - } - Carp::croak("Bogus URL: $path"); -} - sub max { my ($n1, $n2) = @_; return $n1 if $n1 > $n2; @@ -264,6 +292,11 @@ sub eurl { return $a; } +sub encode_url_args { + my $args = shift; + return join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); +} + sub decode_url_args { my $a = shift; my $buffer = ref $a ? $a : \$a; diff --git a/lib/MogileFS/Worker.pm b/lib/MogileFS/Worker.pm index 3b849b0e..f3a74dff 100644 --- a/lib/MogileFS/Worker.pm +++ b/lib/MogileFS/Worker.pm @@ -11,7 +11,7 @@ use fields ('psock', # socket for parent/child communications 'queue_todo', # aref of hrefs of work sent from parent ); -use MogileFS::Util qw(error eurl decode_url_args); +use MogileFS::Util qw(error eurl decode_url_args apply_state_events); use MogileFS::Server; use vars ( @@ -26,7 +26,7 @@ sub new { $self->{psock} = $psock; $self->{readbuf} = ''; $self->{last_bcast_state} = {}; - $self->{monitor_has_run} = 0; + $self->{monitor_has_run} = MogileFS::ProcManager->is_monitor_good; $self->{last_ping} = 0; $self->{last_wake} = {}; $self->{queue_depth} = {}; @@ -41,12 +41,13 @@ sub psock_fd { return fileno($self->{psock}); } -sub validate_dbh { - return Mgd::validate_dbh(); +sub psock { + my $self = shift; + return $self->{psock}; } -sub get_dbh { - return Mgd::get_dbh(); +sub validate_dbh { + return Mgd::validate_dbh(); } sub monitor_has_run { @@ -62,9 +63,8 @@ sub forget_that_monitor_has_run { sub wait_for_monitor { my $self = shift; while (! $self->monitor_has_run) { - $self->read_from_parent; + $self->read_from_parent(1); $self->still_alive; - sleep 1; } } @@ -74,7 +74,7 @@ sub wait_for_monitor { sub still_alive { my $self = shift; my $now = time(); - if ($now > $self->{last_ping}) { + if ($now > $self->{last_ping} + ($self->watchdog_timeout / 4)) { $self->send_to_parent(":still_alive"); # a no-op, just for the watchdog $self->{last_ping} = $now; } @@ -134,8 +134,9 @@ sub read_from_parent { # while things are immediately available, # (or optionally sleep a bit) while (MogileFS::Util::wait_for_readability(fileno($psock), $timeout)) { + $timeout = 0; # only wait on the timeout for the first read. my $buf; - my $rv = sysread($psock, $buf, 1024); + my $rv = sysread($psock, $buf, Mgd::UNIX_RCVBUF_SIZE()); if (!$rv) { if (defined $rv) { die "While reading pipe from parent, got EOF. Parent's gone. Quitting.\n"; @@ -187,45 +188,6 @@ sub parent_ping { } } -sub broadcast_device_writeable { - $_[0]->_broadcast_state("device", $_[1], "writeable"); -} -sub broadcast_device_readable { - $_[0]->_broadcast_state("device", $_[1], "readable"); -} -sub broadcast_device_unreachable { - $_[0]->_broadcast_state("device", $_[1], "unreachable"); -} -sub broadcast_host_reachable { - $_[0]->_broadcast_state("host", $_[1], "reachable"); -} -sub broadcast_host_unreachable { - $_[0]->_broadcast_state("host", $_[1], "unreachable"); -} - -sub _broadcast_state { - my ($self, $what, $whatid, $state) = @_; - if ($what eq "host") { - MogileFS::Host->of_hostid($whatid)->set_observed_state($state); - } elsif ($what eq "device") { - MogileFS::Device->of_devid($whatid)->set_observed_state($state); - } - my $key = "$what-$whatid"; - my $laststate = $self->{last_bcast_state}{$key}; - my $now = time(); - # broadcast on initial discovery, state change, and every 10 seconds - if (!$laststate || $laststate->[1] ne $state || $laststate->[0] < $now - 10) { - $self->send_to_parent(":state_change $what $whatid $state"); - $self->{last_bcast_state}{$key} = [$now, $state]; - } -} - -sub invalidate_meta { - my ($self, $what) = @_; - return if $Mgd::INVALIDATE_NO_PROPOGATE; # anti recursion - $self->send_to_parent(":invalidate_meta $what"); -} - # tries to parse generic (not job-specific) commands sent from parent # to child. returns 1 on success, or 0 if command given isn't generic, # and child should parse. @@ -234,16 +196,6 @@ sub process_generic_command { my ($self, $lineref) = @_; return 0 unless $$lineref =~ /^:/; # all generic commands start with colon - if ($$lineref =~ /^:state_change (\w+) (\d+) (\w+)/) { - my ($what, $whatid, $state) = ($1, $2, $3); - if ($what eq "host") { - MogileFS::Host->of_hostid($whatid)->set_observed_state($state); - } elsif ($what eq "device") { - MogileFS::Device->of_devid($whatid)->set_observed_state($state); - } - return 1; - } - if ($$lineref =~ /^:shutdown/) { $$got_live_vs_die = 1 if $got_live_vs_die; exit 0; @@ -254,11 +206,8 @@ sub process_generic_command { return 1; } - if ($$lineref =~ /^:invalidate_meta_once (\w+)/) { - local $Mgd::INVALIDATE_NO_PROPOGATE = 1; - # where $1 is one of {"domain", "device", "host", "class"} - my $class = "MogileFS::" . ucfirst(lc($1)); - $class->invalidate_cache; + if ($$lineref =~ /^:monitor_events/) { + apply_state_events($lineref); return 1; } @@ -278,18 +227,6 @@ sub process_generic_command { return 1; } - # :set_dev_utilization dev# 45.2 dev# 45.2 dev# 45.2 dev# 45.2 dev 45.2\n - # (dev#, utilz%)+ - if (my ($devid, $util) = $$lineref =~ /^:set_dev_utilization (.+)/) { - my %pairs = split(/\s+/, $1); - local $MogileFS::Device::util_no_broadcast = 1; - while (my ($devid, $util) = each %pairs) { - my $dev = eval { MogileFS::Device->of_devid($devid) } or next; - $dev->set_observed_utilization($util); - } - return 1; - } - # queue_name depth if ($$lineref =~ /^:queue_depth (\w+) (\d+)/) { $self->queue_depth($1, $2); diff --git a/lib/MogileFS/Worker/Delete.pm b/lib/MogileFS/Worker/Delete.pm index da7ebe5d..ff4a421f 100644 --- a/lib/MogileFS/Worker/Delete.pm +++ b/lib/MogileFS/Worker/Delete.pm @@ -4,14 +4,13 @@ package MogileFS::Worker::Delete; use strict; use base 'MogileFS::Worker'; use MogileFS::Util qw(error); +use MogileFS::Server; # we select 1000 but only do a random 100 of them, to allow # for stateless parallelism use constant LIMIT => 1000; use constant PER_BATCH => 100; -# TODO: use LWP and persistent connections to do deletes. less local ports used. - sub new { my ($class, $psock) = @_; my $self = fields::new($class); @@ -31,18 +30,21 @@ sub work { my $old_queue_check = 0; # next time to check the old queue. my $old_queue_backoff = 0; # backoff index - # wait for one pass of the monitor - $self->wait_for_monitor; - while (1) { $self->send_to_parent("worker_bored 50 delete"); $self->read_from_parent(1); - $self->validate_dbh; + next unless $self->validate_dbh; # call our workers, and have them do things # RETVAL = 0; I think I am done working for now # RETVAL = 1; I have more work to do - my $tempres = $self->process_tempfiles; + my $lock = 'mgfs:tempfiles'; + # This isn't something we need to wait for: just need to ensure one is. + my $tempres; + if (Mgd::get_store()->get_lock($lock, 0)) { + $tempres = $self->process_tempfiles; + Mgd::get_store()->release_lock($lock); + } my $delres; if (time() > $old_queue_check) { $self->reenqueue_delayed_deletes; @@ -71,6 +73,29 @@ sub work { } +# deletes a given DevFID from the storage device +# returns true on success, false on failure +sub delete_devfid { + my ($self, $dfid) = @_; + + # send delete request + error("Sending delete for " . $dfid->url) if $Mgd::DEBUG >= 2; + + my $res; + $dfid->device->host->http("DELETE", $dfid->uri_path, undef, sub { ($res) = @_ }); + Danga::Socket->SetPostLoopCallback(sub { !defined $res }); + Danga::Socket->EventLoop; + + my $httpcode = $res->code; + + # effectively means all went well + return 1 if (($httpcode >= 200 && $httpcode <= 299) || $httpcode == 404); + + my $status = $res->status_line; + error("Error: unlink failure: " . $dfid->url . ": HTTP code $status"); + return 0; +} + sub process_tempfiles { my $self = shift; # also clean the tempfile table @@ -153,19 +178,28 @@ sub process_deletes2 { while (my $todo = shift @$queue_todo) { $self->still_alive; - $self->read_from_parent; # load all the devids related to this fid, and delete. my $fid = MogileFS::FID->new($todo->{fid}); my $fidid = $fid->id; + + # if it's currently being replicated, wait for replication to finish + # before deleting to avoid stale files + if (! $sto->should_begin_replicating_fidid($fidid)) { + $sto->reschedule_file_to_delete2_relative($fidid, 1); + next; + } + + $sto->delete_fidid_enqueued($fidid); + my @devids = $fid->devids; my %devids = map { $_ => 1 } @devids; for my $devid (@devids) { - my $dev = $devid ? MogileFS::Device->of_devid($devid) : undef; + my $dev = $devid ? Mgd::device_factory()->get_by_id($devid) : undef; error("deleting fid $fidid, on devid ".($devid || 'NULL')."...") if $Mgd::DEBUG >= 2; - unless ($dev && $dev->exists) { + unless ($dev) { next; } if ($dev->dstate->is_perm_dead) { @@ -199,53 +233,23 @@ sub process_deletes2 { next; } - my $urlparts = MogileFS::Util::url_parts($path); - - # hit up the server and delete it - # TODO: (optimization) use MogileFS->get_observed_state and don't - # try to delete things known to be down/etc - my $sock = IO::Socket::INET->new(PeerAddr => $urlparts->[0], - PeerPort => $urlparts->[1], - Timeout => 2); - # this used to mark the device as down for the whole tracker. - # if the device is actually down, we can struggle until the - # monitor job figures it out... otherwise an occasional timeout - # due to high load will prevent delete from working at all. - unless ($sock) { + if ($self->delete_devfid($dfid)) { + # effectively means all went well + $sto->remove_fidid_from_devid($fidid, $devid); + delete $devids{$devid}; + } else { + # remote file system error? connect failure? retry in 30min $sto->reschedule_file_to_delete2_relative($fidid, - 60 * 60 * (1 + $todo->{failcount})); + 60 * 30 * (1 + $todo->{failcount})); next; } - - # send delete request - error("Sending delete for $path") if $Mgd::DEBUG >= 2; - - $sock->write("DELETE $urlparts->[2] HTTP/1.0\r\n\r\n"); - my $response = <$sock>; - if ($response =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - if (($1 >= 200 && $1 <= 299) || $1 == 404) { - # effectively means all went well - $sto->remove_fidid_from_devid($fidid, $devid); - delete $devids{$devid}; - } else { - # remote file system error? mark node as down - my $httpcode = $1; - error("Error: unlink failure: $path: HTTP code $httpcode"); - - $sto->reschedule_file_to_delete2_relative($fidid, - 60 * 30 * (1 + $todo->{failcount})); - next; - } - } else { - error("Error: unknown response line deleting $path: $response"); - } } # fid has no pants. unless (keys %devids) { $sto->delete_fid_from_file_to_delete2($fidid); - next; } + $sto->note_done_replicating($fidid); } # did work. @@ -270,7 +274,6 @@ sub process_deletes { last if ++$done > PER_BATCH; $self->still_alive; - $self->read_from_parent; my ($fid, $devid) = @$dm; error("deleting fid $fid, on devid ".($devid || 'NULL')."...") if $Mgd::DEBUG >= 2; @@ -312,8 +315,8 @@ sub process_deletes { # CASE: devid is marked dead or doesn't exist: consider it deleted on this devid. # (Note: we're tolerant of '0' as a devid, due to old buggy version which # would sometimes put that in there) - my $dev = $devid ? MogileFS::Device->of_devid($devid) : undef; - unless ($dev && $dev->exists) { + my $dev = $devid ? Mgd::device_factory()->get_by_id($devid) : undef; + unless ($dev) { $done_with_devid->("devid_doesnt_exist"); next; } @@ -345,38 +348,11 @@ sub process_deletes { next; } - my $urlparts = MogileFS::Util::url_parts($path); - - # hit up the server and delete it - # TODO: (optimization) use MogileFS->get_observed_state and don't try to delete things known to be down/etc - my $sock = IO::Socket::INET->new(PeerAddr => $urlparts->[0], - PeerPort => $urlparts->[1], - Timeout => 2); - unless ($sock) { - # timeout or something, mark this device as down for now and move on - $self->broadcast_host_unreachable($dev->hostid); - $reschedule_fid->(60 * 60 * 2, "no_sock_to_hostid"); - next; - } - - # send delete request - error("Sending delete for $path") if $Mgd::DEBUG >= 2; - - $sock->write("DELETE $urlparts->[2] HTTP/1.0\r\n\r\n"); - my $response = <$sock>; - if ($response =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - if (($1 >= 200 && $1 <= 299) || $1 == 404) { - # effectively means all went well - $done_with_devid->("deleted"); - } else { - # remote file system error? mark node as down - my $httpcode = $1; - error("Error: unlink failure: $path: HTTP code $httpcode"); - $reschedule_fid->(60 * 30, "http_code_$httpcode"); - next; - } + if ($self->delete_devfid($dfid)) { + $done_with_devid->("deleted"); } else { - error("Error: unknown response line deleting $path: $response"); + # remote file system error? connect failure? retry in 30min + $reschedule_fid->(60 * 30, "http_failure"); } } diff --git a/lib/MogileFS/Worker/Fsck.pm b/lib/MogileFS/Worker/Fsck.pm index 104dde08..50927c84 100644 --- a/lib/MogileFS/Worker/Fsck.pm +++ b/lib/MogileFS/Worker/Fsck.pm @@ -3,13 +3,12 @@ package MogileFS::Worker::Fsck; use strict; use base 'MogileFS::Worker'; use fields ( - 'last_stop_check', # unixtime 'should_stop_running' last called - 'last_maxcheck_write', # unixtime maxcheck written - 'size_checker', # subref which, given a DevFID, returns size of file 'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? + 'opt_checksum', # (class|off|MD5) checksum mode ); use MogileFS::Util qw(every error debug); use MogileFS::Config; +use MogileFS::Server; use List::Util (); use Time::HiRes (); @@ -27,6 +26,10 @@ use constant EV_START_SEARCH => "SRCH"; use constant EV_FOUND_FID => "FOND"; use constant EV_RE_REPLICATE => "REPL"; use constant EV_BAD_COUNT => "BCNT"; +use constant EV_BAD_CHECKSUM => "BSUM"; +use constant EV_NO_CHECKSUM => "NSUM"; +use constant EV_MULTI_CHECKSUM => "MSUM"; +use constant EV_BAD_HASHTYPE => "BALG"; use POSIX (); @@ -37,79 +40,44 @@ sub watchdog_timeout { 120 } sub work { my $self = shift; - my $run_count = 0; - # this can be CPU-intensive. let's nice ourselves down. POSIX::nice(10); - # - my $running = 0; # start time - my $n_check = 0; # items checked - my $start = sub { - return if $running; - $running = $nowish = time(); - }; - my $stats = sub { - return unless $running; - my $elap = $nowish - $running; - debug(sprintf("In %d secs, %d fids, %0.02f fids/sec\n", $elap, $n_check, ($n_check / ($elap || 1)))); - }; - my $last_beat = 0; - my $beat = sub { - return unless $nowish >= $last_beat + 5; - $stats->(); - $last_beat = $nowish; - }; - my $stop = sub { - return unless $running; - $stats->(); - debug("done."); - $running = 0; - }; - # - my $sto = Mgd::get_store(); my $max_checked = 0; - every(1.0, sub { + every(2.0, sub { my $sleep_set = shift; $nowish = time(); local $Mgd::nowish = $nowish; - # checking doesn't go well if the monitor job hasn't actively started - # marking things as being available - unless ($self->monitor_has_run) { - # only warn on runs after the first. gives the monitor job some time to work - # before we throw a message. - debug("waiting for monitor job to complete a cycle before beginning") - if $run_count++ > 0; - return; - } - my $queue_todo = $self->queue_todo('fsck'); - unless (@{$queue_todo}) { - $self->send_to_parent('worker_bored 50 fsck'); - $self->read_from_parent(1); - } else { - $self->parent_ping; - } + # This counts the same as a $self->still_alive; + $self->send_to_parent('worker_bored 50 fsck'); + return unless @{$queue_todo}; + return unless $self->validate_dbh; my @fids = (); while (my $todo = shift @{$queue_todo}) { my $fid = MogileFS::FID->new($todo->{fid}); - unless ($fid->exists) { + if ($fid->exists) { + push(@fids, $fid); + } else { # FID stopped existing before being checked. $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); } - push(@fids, $fid); } return unless @fids; $self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; + my $alg = MogileFS::Config->server_setting_cached("fsck_checksum"); + if (defined($alg) && $alg eq "off") { + $self->{opt_checksum} = "off"; + } else { + $self->{opt_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; + } MogileFS::FID->mass_load_devids(@fids); - $self->init_size_checker(\@fids); - # don't sleep in loop, next round, since we found stuff to work on # this round... $sleep_set->(0); @@ -122,45 +90,13 @@ sub work { # some connectivity problem... retry this fid later. # (don't dequeue it) $self->still_alive; - sleep 5; next; } $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); - $n_check++; - $beat->(); } }); } -# only write to server_settings table our position every 5 seconds -sub set_max_checked_lazy { - my ($self, $nmax) = @_; - return 0 if $nowish < ($self->{last_maxcheck_write} || 0) + 5; - $self->{last_maxcheck_write} = $nowish; - $self->set_max_checked($nmax); -} - -sub set_max_checked { - my ($self, $nmax) = @_; - MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $nmax); -} - -# this version is accurate, -sub should_be_running { - my $self = shift; - my $fhost = MogileFS::Config->server_setting('fsck_host') - or return; - return $fhost eq MogileFS::Config->hostname; -} - -# this version is sloppy, optimized for speed. only checks db every 5 seconds. -sub should_stop_running { - my $self = shift; - return 0 if $nowish < ($self->{last_stop_check} || 0) + 5; - $self->{last_stop_check} = $nowish; - return ! $self->should_be_running; -} - # given a $fid (MogileFS::FID, with pre-populated ->devids data) # return 0 if reachability problems. # return 1 if fid was checked (regardless of there being problems or not) @@ -172,9 +108,40 @@ sub check_fid { my ($self, $fid) = @_; my $fix = sub { - my $fixed = eval { $self->fix_fid($fid) }; + my ($reason, $recheck) = @_; + my $fixed; + + # we cached devids without locking for the fast path, + # ensure we get an up-to-date list in the slow path. + $fid->forget_cached_devids; + + my $sto = Mgd::get_store(); + unless ($sto->should_begin_replicating_fidid($fid->id)) { + error("Fsck stalled for fid $fid: failed to acquire lock"); + return STALLED; + } + + unless ($fid->exists) { + # FID stopped existing while doing (or waiting on) + # the fast check, give up on this fid + $sto->note_done_replicating($fid->id); + return HANDLED; + } + + # we may have a lockless check which failed, retry the check + # with the lock and see if it succeeds here: + if ($recheck) { + $fixed = $recheck->(); + if (!$fixed) { + $fid->fsck_log($reason); + } + } + + $fixed ||= eval { $self->fix_fid($fid) }; + my $err = $@; + $sto->note_done_replicating($fid->id); if (! defined $fixed) { - error("Fsck stalled for fid $fid: $@"); + error("Fsck stalled for fid $fid: $err"); return STALLED; } $fid->fsck_log(EV_CANT_FIX) if ! $fixed; @@ -186,29 +153,27 @@ sub check_fid { # first obvious fucked-up case: no devids even presumed to exist. unless ($fid->devids) { - # first, log this weird condition. - $fid->fsck_log(EV_NO_PATHS); - - # weird, schedule a fix (which will do a search over all + # weird, recheck with a lock and then log it if it fails + # and attempt a fix (which will do a search over all # devices as a last-ditch effort to locate it) - return $fix->(); + return $fix->(EV_NO_PATHS, sub { $fid->devids }); } # first, see if the assumed devids meet the replication policy for # the fid's class. unless ($fid->devids_meet_policy) { - # log a policy violation - $fid->fsck_log(EV_POLICY_VIOLATION); - return $fix->(); + # recheck for policy violation under a lock, logging the violation + # if we failed. + return $fix->(EV_POLICY_VIOLATION, sub { $fid->devids_meet_policy }); } # This is a simple fixup case - unless (scalar($fid->devids) == $fid->devcount) { - # log a bad count - $fid->fsck_log(EV_BAD_COUNT); + # If we got here, we already know we have no policy violation and + # don't need to call $fix->() to just fix a devcount + $self->maybe_fix_devcount($fid); - # TODO: We could fix this without a complete fix pass - # $fid->update_devcount(); + # missing checksum row + if ($fid->class->hashtype && ! $fid->checksum) { return $fix->(); } @@ -217,6 +182,10 @@ sub check_fid { # check the replication policy, which is already done, so finish. return HANDLED if $self->{opt_nostat}; + if ($self->{opt_checksum} && $self->{opt_checksum} ne "off") { + return $fix->(); + } + # stat each device to see if it's still there. on first problem, # stop and go into the slow(er) fix function. my $err; @@ -224,6 +193,12 @@ sub check_fid { my ($dfid, $disk_size) = @_; if (! defined $disk_size) { my $dev = $dfid->device; + # We end up checking is_perm_dead twice, but that's the way the + # flow goes... + if ($dev->dstate->is_perm_dead) { + $err = "needfix"; + return 0; + } error("Connectivity problem reaching device " . $dev->id . " on host " . $dev->host->ip . "\n"); $err = "stalled"; return 0; @@ -235,7 +210,8 @@ sub check_fid { }); if ($rv) { - return HANDLED; + return ($fid->class->hashtype && !($self->{opt_checksum} && $self->{opt_checksum} eq "off")) + ? $fix->() : HANDLED; } elsif ($err eq "stalled") { return STALLED; } elsif ($err eq "needfix") { @@ -245,15 +221,29 @@ sub check_fid { } } +# returns true if all size checks succeeded, false otherwise sub parallel_check_sizes { my ($self, $dflist, $cb) = @_; - # serial, for now: (just prepping for future parallel future, - # getting interface right) + my $expect = scalar @$dflist; + my ($good, $done) = (0, 0); + foreach my $df (@$dflist) { - my $size = $self->size_on_disk($df); - return 0 unless $cb->($df, $size); + $df->size_on_disk(sub { + my ($size) = @_; + $done++; + if ($cb->($df, $size)) { + $good++; + } else { + # use another timer to force PostLoopCallback to run + Danga::Socket->AddTimer(0, sub { $self->still_alive }); + } + }); } - return 1; + + Danga::Socket->SetPostLoopCallback(sub { $done != $expect }); + Danga::Socket->EventLoop; + + return $good == $expect; } # this is the slow path. if something above in check_fid finds @@ -265,10 +255,7 @@ sub parallel_check_sizes { use constant CANT_FIX => 0; sub fix_fid { my ($self, $fid) = @_; - debug(sprintf("Fixing FID %d\n", $fid->id)); - - # This should happen first, since the fid gets awkwardly reloaded... - $fid->update_devcount; + debug(sprintf("Fixing FID %d", $fid->id)); # make devfid objects from the devids that this fid is on, my @dfids = map { MogileFS::DevFID->new($_, $fid) } $fid->devids; @@ -278,6 +265,9 @@ sub fix_fid { my @good_devs; my @bad_devs; my %already_checked; # devid -> 1. + my $alg = $fid->class->hashname || $self->{opt_checksum}; + my $checksums = {}; + my $ping_cb = sub { $self->still_alive }; my $check_dfids = sub { my $is_desperate_mode = shift; @@ -287,10 +277,33 @@ sub fix_fid { my $dev = $dfid->device; next if $already_checked{$dev->id}++; - my $disk_size = $self->size_on_disk($dfid); + # Got a dead link, but reaper hasn't cleared it yet? + if ($dev->dstate->is_perm_dead) { + push @bad_devs, $dev; + next; + } + + my $disk_size = $dfid->size_on_disk; die "dev " . $dev->id . " unreachable" unless defined $disk_size; if ($disk_size == $fid->length) { + if ($alg && $alg ne "off") { + my $digest = $self->checksum_on_disk($dfid, $alg, $ping_cb); + unless (defined $digest) { + die "dev " . $dev->id . " unreachable"; + } + + # DELETE could've hit right after size check + if ($digest eq "-1") { + unless ($is_desperate_mode) { + $fid->fsck_log(EV_FILE_MISSING, $dev); + } + push @bad_devs, $dfid->device; + next; + } + push @{$checksums->{$digest} ||= []}, $dfid->device; + } + push @good_devs, $dfid->device; # if we were doing a desperate search, one is enough, we can stop now! return if $is_desperate_mode; @@ -300,7 +313,7 @@ sub fix_fid { # don't log in desperate mode, as we'd have "file missing!" log entries # for every device in the normal case, which is expected. unless ($is_desperate_mode) { - if (! $disk_size) { + if ($disk_size == -1) { $fid->fsck_log(EV_FILE_MISSING, $dev); } else { $fid->fsck_log(EV_BAD_LENGTH, $dev); @@ -322,123 +335,169 @@ sub fix_fid { @dfids = List::Util::shuffle( map { MogileFS::DevFID->new($_, $fid) } grep { $_->dstate->should_fsck_search_on } - MogileFS::Device->devices + Mgd::device_factory()->get_all ); $check_dfids->("desperate"); # still can't fix it? - return CANT_FIX unless @good_devs; + unless (@good_devs) { + $self->forget_bad_devs($fid, @bad_devs); + $fid->update_devcount; + return CANT_FIX; + } # wow, we actually found it! - $fid->fsck_log(EV_FOUND_FID); $fid->note_on_device($good_devs[0]); # at least one good one. + $fid->fsck_log(EV_FOUND_FID); # fall through to check policy (which will most likely be # wrong, with only one file_on record...) and re-replicate } - # remove the file_on mappings for devices that were bogus/missing. - foreach my $bdev (@bad_devs) { - error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); - $fid->forget_about_device($bdev); - } - + $self->forget_bad_devs($fid, @bad_devs); # in case the devcount or similar was fixed. $fid->want_reload; + $self->fix_checksums($fid, $alg, $checksums) if $alg && $alg ne "off"; + # Note: this will reload devids, if they called 'note_on_device' # or 'forget_about_device' unless ($fid->devids_meet_policy) { - $fid->enqueue_for_replication; + $fid->enqueue_for_replication(in => 1); $fid->fsck_log(EV_RE_REPLICATE); return HANDLED; } # Clean up the device count if it's wrong - unless(scalar($fid->devids) == $fid->devcount) { - $fid->update_devcount(); - $fid->fsck_log(EV_BAD_COUNT); - } + $self->maybe_fix_devcount($fid); return HANDLED; } -sub init_size_checker { - my ($self, $fidlist) = @_; +sub forget_file_on_with_bad_checksums { + my ($self, $fid, $checksums) = @_; + foreach my $bdevs (values %$checksums) { + foreach my $bdev (@$bdevs) { + error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); + $fid->forget_about_device($bdev); + } + } +} + +# returns -1 on missing, +# undef on connectivity error, +# else checksum of file on disk (after HTTP GET or mogstored read) +sub checksum_on_disk { + my ($self, $dfid, $alg, $ping_cb) = @_; + return $dfid->checksum_on_disk($alg, $ping_cb, "fsck"); +} - $self->still_alive; +sub bad_checksums_errmsg { + my ($self, $alg, $checksums) = @_; + my @err; - my $lo_fid = $fidlist->[0]->id; - my $hi_fid = $fidlist->[-1]->id; + foreach my $checksum (keys %$checksums) { + my $bdevs = join(",", map { $_->id } @{$checksums->{$checksum}}); + $checksum = unpack("H*", $checksum); + push @err, "$alg:$checksum on devids=[$bdevs]" + } - my %size; # $devid -> { $fid -> $size } - my %tried_bulkstat; # $devid -> 1 + return join('; ', @err); +} - $self->{size_checker} = sub { - my $dfid = shift; - my $devid = $dfid->devid; +# we don't now what checksum the file is supposed to be, but some +# of the devices had checksums that didn't match the other(s). +sub auto_checksums_bad { + my ($self, $fid, $checksums) = @_; + my $alg = $self->{opt_checksum}; + my $err = $self->bad_checksums_errmsg($alg, $checksums); - if (my $map = $size{$devid}) { - return $map->{$dfid->fidid} || 0; - } + error("$fid has multiple checksums: $err"); + $fid->fsck_log(EV_MULTI_CHECKSUM); +} - unless ($tried_bulkstat{$devid}++) { - my $mogconn = $dfid->device->host->mogstored_conn; - my $sock = $mogconn->sock(5); - my $good = 0; - my $unknown_cmd = 0; - if ($sock) { - my $cmd = "fid_sizes $lo_fid-$hi_fid $devid\n"; - print $sock $cmd; - my $map = {}; - while (my $line = <$sock>) { - if ($line =~ /^\./) { - $good = 1; - last; - } elsif ($line =~ /^(\d+)\s+(\d+)\s+(\d+)/) { - my ($res_devid, $res_fid, $size) = ($1, $2, $3); - last unless $res_devid == $devid; - $map->{$res_fid} = $size; - } elsif ($line =~ /^ERR/) { - $unknown_cmd = 1; - last; - } else { - last; - } - } +sub all_checksums_bad { + my ($self, $fid, $checksums) = @_; + my $alg = $fid->class->hashname or return; # class could've changed + my $cur_checksum = $fid->checksum; + my $err = $self->bad_checksums_errmsg($alg, $checksums); + my $cur = $cur_checksum ? "Expected: $cur_checksum" + : "No known valid checksum"; + error("all checksums bad: $err. $cur"); + $fid->fsck_log(EV_BAD_CHECKSUM); +} - # we only update our $nowish (approximate time) lazily, when we - # know time might've advanced (like during potentially slow RPC call) - $nowish = $self->still_alive; - - if ($good) { - $size{$devid} = $map; - return $map->{$dfid->fidid} || 0; - } elsif (!$unknown_cmd) { - # mogstored connection is unknown state... can't - # trust it, so close it. - $mogconn->mark_dead; +sub fix_checksums { + my ($self, $fid, $alg, $checksums) = @_; + my $cur_checksum = $fid->checksum; + my @all_checksums = keys(%$checksums); + + if (scalar(@all_checksums) == 1) { # all checksums match, good! + my $disk_checksum = $all_checksums[0]; + if ($cur_checksum) { + if ($cur_checksum->{checksum} ne $disk_checksum) { + my $expect = $cur_checksum->info; + my $actual = "$alg:" . unpack("H*", $disk_checksum); + error("$cur_checksum does not match disk: $actual"); + if ($alg ne $cur_checksum->hashname) { + $fid->fsck_log(EV_BAD_HASHTYPE); + } else { + $fid->fsck_log(EV_BAD_CHECKSUM); } } - error("fid_sizes mogstored cmd unavailable for dev $devid; using slower method"); + } else { # fresh row to checksum + my $hashtype = $fid->class->hashtype; + + # we store this in the database + if ($hashtype) { + my %row = ( + fid => $fid->id, + checksum => $disk_checksum, + hashtype => $hashtype, + ); + my $new_checksum = MogileFS::Checksum->new(\%row); + debug("creating new checksum=$new_checksum"); + $fid->fsck_log(EV_NO_CHECKSUM); + $new_checksum->save; + } else { + my $hex_checksum = unpack("H*", $disk_checksum); + my $alg = $self->{opt_checksum}; + debug("fsck_checksum=auto good: $fid $alg:$hex_checksum"); + } } + } elsif ($cur_checksum) { + my $good = delete($checksums->{$cur_checksum->{checksum}}); + if ($good && (scalar(@$good) > 0)) { + $self->forget_file_on_with_bad_checksums($fid, $checksums); + # will fail $fid->devids_meet_policy and re-replicate + } else { + $self->all_checksums_bad($fid, $checksums); + } + } elsif ($self->{opt_checksum}) { + $self->auto_checksums_bad($fid, $checksums); + } else { + $self->all_checksums_bad($fid, $checksums); + } +} - # slow case (not using new command) - $nowish = $self->still_alive; - return $dfid->size_on_disk; - }; +# remove the file_on mappings for devices that were bogus/missing. +sub forget_bad_devs { + my ($self, $fid, @bad_devs) = @_; + foreach my $bdev (@bad_devs) { + error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); + $fid->forget_about_device($bdev); + } } -# returns 0 on missing, -# undef on connectivity error, -# else size of file on disk (after HTTP HEAD or mogstored stat) -sub size_on_disk { - my ($self, $dfid) = @_; - return $dfid->size_on_disk; - # Mass checker is disabled for now... doesn't run on our production - # hosts due to massive gaps in the fids. Instead we make the process - # parallel and will rework it later. - #return $self->{size_checker}->($dfid); +sub maybe_fix_devcount { + # don't even log BCNT errors if skip_devcount is enabled + return if MogileFS::Config->server_setting_cached('skip_devcount'); + + my ($self, $fid) = @_; + return if scalar($fid->devids) == $fid->devcount; + # log a bad count + $fid->fsck_log(EV_BAD_COUNT); + $fid->update_devcount(); } 1; diff --git a/lib/MogileFS/Worker/JobMaster.pm b/lib/MogileFS/Worker/JobMaster.pm index 8fc0e29c..d70eb395 100644 --- a/lib/MogileFS/Worker/JobMaster.pm +++ b/lib/MogileFS/Worker/JobMaster.pm @@ -12,8 +12,9 @@ use fields ( 'dele_queue_limit', 'rebl_queue_limit', ); -use MogileFS::Util qw(every error debug eurl); +use MogileFS::Util qw(every error debug encode_url_args); use MogileFS::Config; +use MogileFS::Server; use constant DEF_FSCK_QUEUE_MAX => 20_000; use constant DEF_FSCK_QUEUE_INJECT => 1000; @@ -45,16 +46,30 @@ sub work { $self->{dele_queue_limit} = 100; $self->{rebl_queue_limit} = 100; - every(1, sub { - # 'pings' parent and populates all queues. + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + + # kick off the initial run + $self->check_queues; + Danga::Socket->EventLoop; +} + +# 'pings' parent and populates all queues. +sub check_queues { + my $self = shift; + + my $active = 0; + if ($self->validate_dbh) { $self->send_to_parent("queue_depth all"); my $sto = Mgd::get_store(); - $self->read_from_parent(1); - $self->_check_replicate_queues($sto); - $self->_check_delete_queues($sto); - $self->_check_fsck_queues($sto); - $self->_check_rebal_queues($sto); - }); + $self->parent_ping; + $active += $self->_check_replicate_queues($sto); + $active += $self->_check_delete_queues($sto); + $active += $self->_check_fsck_queues($sto); + $active += $self->_check_rebal_queues($sto); + } + + # don't sleep if active (just avoid recursion) + Danga::Socket->AddTimer($active ? 0 : 1, sub { $self->check_queues }); } sub _check_delete_queues { @@ -69,8 +84,9 @@ sub _check_delete_queues { return unless @to_del; for my $todo (@to_del) { $self->send_to_parent("queue_todo delete " . - _eurl_encode_args($todo)); + encode_url_args($todo)); } + return 1; } # NOTE: we only maintain one queue per worker, but we can easily @@ -97,8 +113,9 @@ sub _check_replicate_queues { for my $todo (@to_repl) { $todo->{_type} = 'replicate'; # could be 'drain', etc. $self->send_to_parent("queue_todo replicate " . - _eurl_encode_args($todo)); + encode_url_args($todo)); } + return 1; } # FSCK is going to be a little odd... We still need a single "global" @@ -106,7 +123,7 @@ sub _check_replicate_queues { sub _check_fsck_queues { my $self = shift; my $sto = shift; - my $fhost = MogileFS::Config->server_setting('fsck_host'); + my $fhost = MogileFS::Config->server_setting_cached('fsck_host'); if ($fhost && $fhost eq MogileFS::Config->hostname) { $self->_inject_fsck_queues($sto); } @@ -125,8 +142,9 @@ sub _check_fsck_queues { $self->{fsck_queue_limit} = @to_fsck ? $new_limit : 100; return unless @to_fsck; for my $todo (@to_fsck) { - $self->send_to_parent("queue_todo fsck " . _eurl_encode_args($todo)); + $self->send_to_parent("queue_todo fsck " . encode_url_args($todo)); } + return 1; } sub _inject_fsck_queues { @@ -136,20 +154,24 @@ sub _inject_fsck_queues { $sto->fsck_log_summarize; my $queue_size = $sto->file_queue_length(FSCK_QUEUE); my $max_queue = - MogileFS::Config->server_setting_cached('queue_size_for_fsck', 60) || + MogileFS::Config->server_setting_cached('queue_size_for_fsck') || DEF_FSCK_QUEUE_MAX; return if ($queue_size >= $max_queue); my $max_checked = MogileFS::Config->server_setting('fsck_highest_fid_checked') || 0; + my $fid_at_end = MogileFS::Config->server_setting('fsck_fid_at_end'); my $to_inject = - MogileFS::Config->server_setting_cached('queue_rate_for_fsck', 60) || + MogileFS::Config->server_setting_cached('queue_rate_for_fsck') || DEF_FSCK_QUEUE_INJECT; - my $fids = $sto->get_fidids_above_id($max_checked, $to_inject); + my $fids = $sto->get_fidids_between($max_checked, $fid_at_end, $to_inject); unless (@$fids) { - $sto->set_server_setting("fsck_host", undef); - $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime); MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $max_checked); + + # set these last since tests/scripts may rely on these to + # determine when fsck (injection) is complete + $sto->set_server_setting("fsck_host", undef); + $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime); return; } @@ -162,7 +184,7 @@ sub _inject_fsck_queues { sub _check_rebal_queues { my $self = shift; my $sto = shift; - my $rhost = MogileFS::Config->server_setting('rebal_host'); + my $rhost = MogileFS::Config->server_setting_cached('rebal_host'); if ($rhost && $rhost eq MogileFS::Config->hostname) { $self->_inject_rebalance_queues($sto); } @@ -177,8 +199,9 @@ sub _check_rebal_queues { return unless @to_rebal; for my $todo (@to_rebal) { $todo->{_type} = 'rebalance'; - $self->send_to_parent("queue_todo rebalance " . _eurl_encode_args($todo)); + $self->send_to_parent("queue_todo rebalance " . encode_url_args($todo)); } + return 1; } sub _inject_rebalance_queues { @@ -187,29 +210,41 @@ sub _inject_rebalance_queues { my $queue_size = $sto->file_queue_length(REBAL_QUEUE); my $max_queue = - MogileFS::Config->server_setting_cached('queue_size_for_rebal', 60) || + MogileFS::Config->server_setting_cached('queue_size_for_rebal') || DEF_REBAL_QUEUE_MAX; return if ($queue_size >= $max_queue); my $to_inject = - MogileFS::Config->server_setting_cached('queue_rate_for_rebal', 60) || + MogileFS::Config->server_setting_cached('queue_rate_for_rebal') || DEF_REBAL_QUEUE_INJECT; # TODO: Cache the rebal object. Requires explicitly blowing it up at the # end of a run or ... I guess whenever the host sees it's not the rebal # host. my $rebal = MogileFS::Rebalance->new; + my $signal = MogileFS::Config->server_setting('rebal_signal'); my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); my $rebal_state = MogileFS::Config->server_setting('rebal_state'); $rebal->policy($rebal_pol); - my @devs = MogileFS::Device->devices; + my @devs = Mgd::device_factory()->get_all; if ($rebal_state) { $rebal->load_state($rebal_state); } else { $rebal->init(\@devs); } + # Stopping is done via signal so we can note stop time in the state, + # and un-drain any devices that should be un-drained. + if ($signal && $signal eq 'stop') { + $rebal->stop; + $rebal_state = $rebal->save_state; + $sto->set_server_setting('rebal_signal', undef); + $sto->set_server_setting("rebal_host", undef); + $sto->set_server_setting('rebal_state', $rebal_state); + return; + } + my $devfids = $rebal->next_fids_to_rebalance(\@devs, $sto, $to_inject); # undefined means there's no work left. @@ -217,6 +252,9 @@ sub _inject_rebalance_queues { # Append some info to a rebalance log table? # Leave state in the system for inspection post-run. # TODO: Emit some sort of syslog/status line. + $rebal->finish; + $rebal_state = $rebal->save_state; + $sto->set_server_setting('rebal_state', $rebal_state); $sto->set_server_setting("rebal_host", undef); return; } @@ -239,7 +277,7 @@ sub _inject_rebalance_queues { # fast trying to keep the queue full. sub queue_depth_check { my $max_limit = - MogileFS::Config->server_setting_cached('internal_queue_limit', 120) + MogileFS::Config->server_setting_cached('internal_queue_limit') || 500; my ($depth, $limit) = @_; @@ -252,12 +290,6 @@ sub queue_depth_check { return (0, $limit); } -# TODO: Move this into Util.pm? -sub _eurl_encode_args { - my $args = shift; - return join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); -} - 1; # Local Variables: diff --git a/lib/MogileFS/Worker/Monitor.pm b/lib/MogileFS/Worker/Monitor.pm index 073a1c81..912822d4 100644 --- a/lib/MogileFS/Worker/Monitor.pm +++ b/lib/MogileFS/Worker/Monitor.pm @@ -4,18 +4,32 @@ use warnings; use base 'MogileFS::Worker'; use fields ( - 'last_db_update', # devid -> time. update db less often than poll interval. 'last_test_write', # devid -> time. time we last tried writing to a device. + 'monitor_start', # main monitor start time 'skip_host', # hostid -> 1 if already noted dead (reset every loop) 'seen_hosts', # IP -> 1 (reset every loop) - 'ua', # LWP::UserAgent for checking usage files 'iow', # MogileFS::IOStatWatcher object + 'prev_data', # DB data from previous run + 'devutil', # Running tally of device utilization + 'events', # Queue of state events + 'refresh_state', # devid -> { used, total, callbacks }, temporary data in each refresh run + 'have_masterdb', # Hint flag for if the master DB is available + 'updateable_devices', # devid -> Device, avoids device table updates + 'parent', # socketpair to parent process + 'refresh_pending', # set if there was a manually-requested refresh + 'db_monitor_ran', # We announce "monitor_just_ran" every time the + # device checks are run, but only if the DB has + # been checked inbetween. + 'devs_to_update' # device table update queue ); use Danga::Socket 1.56; use MogileFS::Config; -use MogileFS::Util qw(error debug); +use MogileFS::Util qw(error debug encode_url_args apply_state_events_list); use MogileFS::IOStatWatcher; +use MogileFS::Server; +use MogileFS::Connection::Parent; +use Digest::MD5 qw(md5_base64); use constant UPDATE_DB_EVERY => 15; @@ -24,9 +38,13 @@ sub new { my $self = fields::new($class); $self->SUPER::new($psock); - $self->{last_db_update} = {}; $self->{last_test_write} = {}; $self->{iow} = MogileFS::IOStatWatcher->new; + $self->{prev_data} = { domain => {}, class => {}, host => {}, + device => {} }; + $self->{devutil} = { cur => {}, prev => {}, tmp => {} }; + $self->{events} = []; + $self->{have_masterdb} = 0; return $self; } @@ -34,12 +52,147 @@ sub watchdog_timeout { 30; } +# returns 1 if a DB update was attempted +# returns 0 immediately if the (device) monitor is already running +sub cache_refresh { + my $self = shift; + + if ($self->{refresh_state}) { + debug("Monitor run in progress, will not check for DB updates"); + return 0; + } + + debug("Monitor running; checking DB for updates"); + # "Fix" our local cache of this flag, so we always check the master DB. + MogileFS::Config->cache_server_setting('_master_db_alive', 1); + my $have_dbh = $self->validate_dbh; + if ($have_dbh && !$self->{have_masterdb}) { + $self->{have_masterdb} = 1; + $self->set_event('srvset', '_master_db_alive', { value => 1 }); + } elsif (!$have_dbh) { + $self->{have_masterdb} = 0; + $self->set_event('srvset', '_master_db_alive', { value => 0 }); + error("Cannot connect to master database!"); + } + + if ($have_dbh) { + my $db_data = $self->grab_all_data; + + # Stack diffs to ship back later + $self->diff_data($db_data); + } + + $self->send_events_to_parent; + $self->{db_monitor_ran} = 1; + + return 1; +} + +sub usage_refresh { + my ($self) = @_; + + # prevent concurrent refresh + return if $self->{refresh_state}; + + debug("Monitor running; scanning usage files"); + + $self->{refresh_state} = {}; # devid -> ... + $self->{monitor_start} = Time::HiRes::time(); + + my $have_dbh = $self->validate_dbh; + + # See if we should be allowed to update the device table rows. + if ($have_dbh && Mgd::get_store()->get_lock('mgfs:device_update', 0)) { + # Fetch the freshlist list of entries, to avoid excessive writes. + $self->{updateable_devices} = { map { $_->{devid} => $_ } + Mgd::get_store()->get_all_devices }; + $self->{devs_to_update} = []; + } else { + $self->{updateable_devices} = undef; + } + + $self->{skip_host} = {}; # hostid -> 1 if already noted dead. + $self->{seen_hosts} = {}; # IP -> 1 + + my $dev_factory = MogileFS::Factory::Device->get_factory(); + my $devutil = $self->{devutil}; + + $devutil->{tmp} = {}; + # kick off check_device to test host/devs. diff against old values. + for my $dev ($dev_factory->get_all) { + if (my $state = $self->is_iow_diff($dev)) { + $self->state_event('device', $dev->id, {utilization => $state}); + } + $devutil->{tmp}->{$dev->id} = $devutil->{cur}->{$dev->id}; + + $dev->can_read_from or next; + $self->check_device_begin($dev); + } + # we're done if we didn't schedule any work + $self->usage_refresh_done unless keys %{$self->{refresh_state}}; +} + +sub usage_refresh_done { + my ($self) = @_; + + $self->{devutil}->{prev} = $self->{devutil}->{tmp}; + # Set the IOWatcher hosts (once old monitor code has been disabled) + + $self->send_events_to_parent; + + $self->{iow}->set_hosts(keys %{$self->{seen_hosts}}); + + foreach my $devid (keys %{$self->{refresh_state}}) { + error("device check incomplete for dev$devid"); + } + + my $start = delete $self->{monitor_start}; + my $elapsed = Time::HiRes::time() - $start; + debug("device refresh finished after $elapsed"); + + $self->{refresh_state} = undef; + my $pending_since = $self->{refresh_pending}; + + # schedule another usage_refresh immediately if somebody requested it + # Don't announce :monitor_just_ran if somebody requested a refresh + # while we were running, we could've been refreshing on a stale DB + if ($pending_since && $pending_since > $start) { + # using AddTimer to schedule the refresh to avoid stack overflow + # since usage_refresh can call usage_refresh_done directly if + # there are no devices + Danga::Socket->AddTimer(0, sub { + $self->cache_refresh; + $self->usage_refresh; + }); + } + + # announce we're done if we ran on schedule, or we had a + # forced refresh that was requested before we started. + if (!$pending_since || $pending_since <= $start) { + # totally done refreshing, accept manual refresh requests again + $self->{parent}->watch_read(1); + delete $self->{refresh_pending}; + if (delete $self->{db_monitor_ran} || $pending_since) { + $self->send_to_parent(":monitor_just_ran"); + } + } + + if ($self->{updateable_devices}) { + my $sto = Mgd::get_store(); + my $updates = delete $self->{devs_to_update}; + $sto->update_device_usages($updates, sub { $self->still_alive }); + $sto->release_lock('mgfs:device_update'); + $self->{updateable_devices} = undef; + } +} + sub work { my $self = shift; - # we just forked from our parent process, also using Danga::Socket, - # so we need to lose all that state and start afresh. - Danga::Socket->Reset; + # It makes sense to have monitor use a shorter timeout + # (conn_timeout) across the board to skip slow hosts. Other workers + # are less tolerant, and may use a higher value in node_timeout. + MogileFS::Config->set_config_no_broadcast("node_timeout", MogileFS::Config->config("conn_timeout")); my $iow = $self->{iow}; $iow->on_stats(sub { @@ -47,100 +200,197 @@ sub work { while (my ($devid, $util) = each %$stats) { # Lets not propagate devices that we accidentally find. - # This does hit the DB every time a device does not exist, so - # perhaps should add negative caching in the future. - my $dev = MogileFS::Device->of_devid($devid); - next unless $dev->exists; - $dev->set_observed_utilization($util); + my $dev = Mgd::device_factory()->get_by_id($devid); + next unless $dev; + $self->{devutil}->{cur}->{$devid} = $util; } }); - my $main_monitor; - $main_monitor = sub { - $self->parent_ping; - - # get db and note we're starting a run - debug("Monitor running; scanning usage files"); - $self->validate_dbh; - - $self->{skip_host} = {}; # hostid -> 1 if already noted dead. - $self->{seen_hosts} = {}; # IP -> 1 + my $db_monitor; + $db_monitor = sub { + $self->still_alive; - # now iterate over devices - MogileFS::Device->invalidate_cache; - MogileFS::Host->invalidate_cache; - - foreach my $dev (MogileFS::Device->devices) { - next unless $dev->dstate->should_monitor; - next if $self->{skip_host}{$dev->hostid}; - $self->check_device($dev); + # reschedule immediately if we were blocked by main_monitor. + # setting refresh_pending will call cache_refresh again + if (!$self->cache_refresh) { + $self->{refresh_pending} ||= Time::HiRes::time(); } - $iow->set_hosts(keys %{$self->{seen_hosts}}); - $self->send_to_parent(":monitor_just_ran"); + # always reschedule in 4 seconds, regardless + Danga::Socket->AddTimer(4, $db_monitor); + }; + + $db_monitor->(); + $self->read_from_parent; - # Make sure we sleep for at least 2.5 seconds before running again. - # If there's a die above, the monitor will be restarted. + my $main_monitor; + $main_monitor = sub { + $self->{parent}->ping; + $self->usage_refresh; Danga::Socket->AddTimer(2.5, $main_monitor); }; - $main_monitor->(); + $self->parent_ping; # ensure we get the initial DB state back + $self->{parent} = MogileFS::Connection::Parent->new($self); + Danga::Socket->AddTimer(0, $main_monitor); Danga::Socket->EventLoop; } +sub process_line { + my MogileFS::Worker::Monitor $self = shift; + my $lineref = shift; + if ($$lineref =~ /^:refresh_monitor$/) { + if ($self->cache_refresh) { + $self->usage_refresh; + } else { + $self->{refresh_pending} ||= Time::HiRes::time(); + } + # try to stop processing further refresh_monitor requests + # if we're acting on a manual refresh + $self->{parent}->watch_read(0); + return 1; + } + return 0; +} + # -------------------------------------------------------------------------- -sub ua { +# Flattens and flips events up to the parent. Can be huge on startup! +# Events: set type foo=bar&baz=quux +# remove type id +# setstate type id foo=bar&baz=quux +# Combined: ev_mode=set&ev_type=device&foo=bar +# ev_mode=setstate&ev_type=device&ev_id=1&foo=bar +sub send_events_to_parent { my $self = shift; - return $self->{ua} ||= LWP::UserAgent->new( - timeout => MogileFS::Config->config('conn_timeout') || 2, - keep_alive => 20, - ); + my @flat = (); + for my $ev (@{$self->{events}}) { + my ($mode, $type, $args) = @$ev; + $args->{ev_mode} = $mode; + $args->{ev_type} = $type; + push(@flat, encode_url_args($args)); + } + return unless @flat; + $self->{events} = []; + + { + # $events can be several MB, so let it go out-of-scope soon: + my $events = join(' ', ':monitor_events', @flat); + debug("sending state changes $events", 2); + $self->send_to_parent($events); + } + + apply_state_events_list(@flat); } -sub check_device { - my ($self, $dev) = @_; +sub add_event { + push(@{$_[0]->{events}}, $_[1]); +} +sub set_event { + # Allow callers to use shorthand + $_[3]->{ev_id} = $_[2]; + $_[0]->add_event(['set', $_[1], $_[3]]); +} +sub remove_event { $_[0]->add_event(['remove', $_[1], { ev_id => $_[2] }]); } +sub state_event { + $_[3]->{ev_id} = $_[2]; + $_[0]->add_event(['setstate', $_[1], $_[3]]); +} + +sub is_iow_diff { + my ($self, $dev) = @_; my $devid = $dev->id; - my $host = $dev->host; + my $p = $self->{devutil}->{prev}->{$devid}; + my $c = $self->{devutil}->{cur}->{$devid}; + if ( ! defined $p || $p ne $c ) { + return $c; + } + return undef; +} - my $port = $host->http_port; - my $get_port = $host->http_get_port; # || $port; - my $hostip = $host->ip; - my $url = $dev->usage_url; +sub diff_data { + my ($self, $db_data) = @_; + + my $new_data = {}; + my $prev_data = $self->{prev_data}; + for my $type (keys %{$db_data}) { + my $d_data = $db_data->{$type}; + my $p_data = $prev_data->{$type}; + my $n_data = {}; + + for my $item (@{$d_data}) { + my $id = $type eq 'domain' ? $item->{dmid} + : $type eq 'class' ? $item->{dmid} . '-' . $item->{classid} + : $type eq 'host' ? $item->{hostid} + : $type eq 'device' ? $item->{devid} + : $type eq 'srvset' ? $item->{field} + : die "Unknown type"; + my $old = delete $p_data->{$id}; + # Special case: for devices, we don't care if mb_asof changes. + # FIXME: Change the grab routine (or filter there?). + delete $item->{mb_asof} if $type eq 'device'; + if (!$old || $self->diff_hash($old, $item)) { + $self->set_event($type, $id, { %$item }); + } + $n_data->{$id} = $item; + } + for my $id (keys %{$p_data}) { + $self->remove_event($type, $id); + } - $self->{seen_hosts}{$hostip} = 1; + $new_data->{$type} = $n_data; + } + $self->{prev_data} = $new_data; +} - # now try to get the data with a short timeout - my $timeout = MogileFS::Config->config('conn_timeout') || 2; - my $start_time = Time::HiRes::time(); +# returns 1 if the hashes are different. +sub diff_hash { + my ($self, $old, $new) = @_; + + my %keys = (); + map { $keys{$_}++ } keys %$old, keys %$new; + for my $k (keys %keys) { + return 1 if (exists $old->{$k} && ! exists $new->{$k}); + return 1 if (exists $new->{$k} && ! exists $old->{$k}); + return 1 if (defined $old->{$k} && ! defined $new->{$k}); + return 1 if (defined $new->{$k} && ! defined $old->{$k}); + next if (! defined $new->{$k} && ! defined $old->{$k}); + return 1 if ($old->{$k} ne $new->{$k}); + } + return 0; +} - my $ua = $self->ua; - my $response = $ua->get($url); - my $res_time = Time::HiRes::time(); - - $hostip ||= 'unknown'; - $get_port ||= 'unknown'; - $devid ||= 'unknown'; - $timeout ||= 'unknown'; - $url ||= 'unknown'; - unless ($response->is_success) { - my $failed_after = $res_time - $start_time; - if ($failed_after < 0.5) { - $self->broadcast_device_unreachable($dev->id); - error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); - } else { - $failed_after = sprintf("%.02f", $failed_after); - $self->broadcast_host_unreachable($dev->hostid); - $self->{skip_host}{$dev->hostid} = 1; - error("Timeout contacting $hostip dev $devid ($url): took $failed_after seconds out of $timeout allowed"); - } - return; +sub grab_all_data { + my $self = shift; + my $sto = Mgd::get_store(); + + # Normalize the domain data to the rest to simplify the differ. + # FIXME: Once new objects are swapped in, fix the original + my %dom = $sto->get_all_domains; + my @fixed_dom = (); + while (my ($name, $id) = each %dom) { + push(@fixed_dom, { namespace => $name, dmid => $id }); } - # at this point we can reach the host - $self->broadcast_host_reachable($dev->hostid); - $self->{iow}->restart_monitoring_if_needed($hostip); + my $set = $sto->server_settings; + my @fixed_set = (); + while (my ($field, $value) = each %$set) { + push(@fixed_set, { field => $field, value => $value }); + } + + my %ret = ( domain => \@fixed_dom, + class => [$sto->get_all_classes], + host => [$sto->get_all_hosts], + device => [$sto->get_all_devices], + srvset => \@fixed_set, ); + return \%ret; +} + +# returns true on success, false on failure +sub check_usage_response { + my ($self, $dev, $response) = @_; + my $devid = $dev->id; my %stats; my $data = $response->content; @@ -155,59 +405,243 @@ sub check_device { $total = "" unless defined $total; my $clen = length($data || ""); error("dev$devid reports used = $used, total = $total, content-length: $clen, error?"); - return; + return 0; } + my $rstate = $self->{refresh_state}->{$devid}; + ($rstate->{used}, $rstate->{total}) = ($used, $total); + # only update database every ~15 seconds per device - my $last_update = $self->{last_db_update}{$dev->id} || 0; - my $next_update = $last_update + UPDATE_DB_EVERY; - my $now = time(); - if ($now >= $next_update) { - Mgd::get_store()->update_device_usage(mb_total => int($total / 1024), - mb_used => int($used / 1024), - devid => $devid); - $self->{last_db_update}{$devid} = $now; - } - - # next if we're not going to try this now - return if ($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now; - $self->{last_test_write}{$devid} = $now; - - # now we want to check if this device is writeable - - # first, create the test-write directory. this will return - # immediately after the first time, as the 'create_directory' - # function caches what it's already created. - $dev->create_directory("/dev$devid/test-write"); - - my $num = int(rand 100); # this was "$$-$now" before, but we don't yet have a cleaner in mogstored for these files - my $puturl = "http://$hostip:$port/dev$devid/test-write/test-write-$num"; - my $content = "time=$now rand=$num"; - my $req = HTTP::Request->new(PUT => $puturl); - $req->content($content); - - # TODO: guard against race-conditions with double-check on failure - - # now, depending on what happens - my $resp = $ua->request($req); - if ($resp->is_success) { - # now let's get it back to verify; note we use the get_port to verify that - # the distinction works (if we have one) - my $geturl = "http://$hostip:$get_port/dev$devid/test-write/test-write-$num"; - my $testwrite = $ua->get($geturl); - - # if success and the content matches, mark it writeable - if ($testwrite->is_success && $testwrite->content eq $content) { - $self->broadcast_device_writeable($devid); - debug("dev$devid: used = $used, total = $total, writeable = 1"); - return; + if ($self->{updateable_devices}) { + my $devrow = $self->{updateable_devices}->{$devid}; + my $last = ($devrow && $devrow->{mb_asof}) ? $devrow->{mb_asof} : 0; + my $now = time(); + if ($last + UPDATE_DB_EVERY < $now) { + my %upd = (mb_total => int($total / 1024), + mb_used => int($used / 1024), + mb_asof => $now, + devid => $devid); + push @{$self->{devs_to_update}}, \%upd; } } + return 1; +} + +sub dev_debug { + my ($self, $dev, $writable) = @_; + return unless $Mgd::DEBUG >= 1; + my $devid = $dev->id; + my $rstate = $self->{refresh_state}->{$devid}; + my ($used, $total) = ($rstate->{used}, $rstate->{total}); + + debug("dev$devid: used = $used, total = $total, writeable = $writable"); +} + +sub check_write { + my ($self, $dev) = @_; + my $rstate = $self->{refresh_state}->{$dev->id}; + my $test_write = $rstate->{test_write}; + + if (!$test_write || $test_write->{tries} > 0) { + # this was "$$-$now" before, but we don't yet have a cleaner in + # mogstored for these files + my $num = int(rand 100); + $test_write = $rstate->{test_write} ||= {}; + $test_write->{path} = "/dev${\$dev->id}/test-write/test-write-$num"; + $test_write->{content} = "time=" . time . " rand=$num"; + $test_write->{tries} ||= 2; + } + $test_write->{tries}--; + + my $opts = { content => $test_write->{content} }; + $dev->host->http("PUT", $test_write->{path}, $opts, sub { + my ($response) = @_; + $self->on_check_write_response($dev, $response); + }); +} + +# starts the lengthy device check process +sub check_device_begin { + my ($self, $dev) = @_; + $self->{refresh_state}->{$dev->id} = {}; + + $self->check_device($dev); +} + +# the lengthy device check process +sub check_device { + my ($self, $dev) = @_; + return $self->check_device_done($dev) if $self->{skip_host}{$dev->hostid}; + + my $devid = $dev->id; + my $url = $dev->usage_url; + my $host = $dev->host; + + $self->{seen_hosts}{$host->ip} = 1; + + # now try to get the data with a short timeout + my $start_time = Time::HiRes::time(); + $host->http_get("GET", $dev->usage_url, undef, sub { + my ($response) = @_; + if (!$self->on_usage_response($dev, $response, $start_time)) { + return $self->check_device_done($dev); + } + # next if we're not going to try this now + my $now = time(); + if (($self->{last_test_write}{$devid} || 0) + UPDATE_DB_EVERY > $now) { + return $self->check_device_done($dev); + } + $self->{last_test_write}{$devid} = $now; + + unless ($dev->can_delete_from) { + # we should not try to write on readonly devices because it can be + # mounted as RO. + return $self->dev_observed_readonly($dev); + } + # now we want to check if this device is writeable + + # first, create the test-write directory. this will return + # immediately after the first time, as the 'create_directory' + # function caches what it's already created. + $dev->create_directory("/dev$devid/test-write", sub { + $self->check_write($dev); + }); + }); +} + +# called on a successful PUT, ensure the data we get back is what we uploaded +sub check_reread { + my ($self, $dev) = @_; + # now let's get it back to verify; note we use the get_port to + # verify that the distinction works (if we have one) + my $test_write = $self->{refresh_state}->{$dev->id}->{test_write}; + $dev->host->http_get("GET", $test_write->{path}, undef, sub { + my ($response) = @_; + $self->on_check_reread_response($dev, $response); + }); +} - # if we fall through to here, then we know that something is not so good, so mark it readable - # which is guaranteed given we even tested writeability - $self->broadcast_device_readable($devid); - debug("dev$devid: used = $used, total = $total, writeable = 0"); +sub on_check_reread_response { + my ($self, $dev, $response) = @_; + my $test_write = $self->{refresh_state}->{$dev->id}->{test_write}; + + # if success and the content matches, mark it writeable + if ($response->is_success) { + if ($response->content eq $test_write->{content}) { + if (!$dev->observed_writeable) { + my $event = { observed_state => 'writeable' }; + $self->state_event('device', $dev->id, $event); + } + $self->dev_debug($dev, 1); + + return $self->check_bogus_md5($dev); # onto the final check... + } + + # content didn't match due to race, retry and hope we're lucky + return $self->check_write($dev) if ($test_write->{tries} > 0); + } + + return $self->dev_observed_readonly($dev); # it's read-only at least +} + +sub on_check_write_response { + my ($self, $dev, $response) = @_; + return $self->check_reread($dev) if $response->is_success; + return $self->dev_observed_readonly($dev); +} + +# returns true on success, false on failure +sub on_usage_response { + my ($self, $dev, $response, $start_time) = @_; + my $host = $dev->host; + my $hostip = $host->ip; + + if ($response->is_success) { + # at this point we can reach the host + if (!$host->observed_reachable) { + my $event = { observed_state => 'reachable' }; + $self->state_event('host', $dev->hostid, $event); + } + $self->{iow}->restart_monitoring_if_needed($hostip); + + return $self->check_usage_response($dev, $response); + } + + my $url = $dev->usage_url; + my $failed_after = Time::HiRes::time() - $start_time; + if ($failed_after < 0.5) { + if (!$dev->observed_unreachable) { + my $event = { observed_state => 'unreachable' }; + $self->state_event('device', $dev->id, $event); + } + my $get_port = $host->http_get_port; + error("Port $get_port not listening on $hostip ($url)? Error was: " . $response->status_line); + } else { + $failed_after = sprintf("%.02f", $failed_after); + if (!$host->observed_unreachable) { + my $event = { observed_state => 'unreachable' }; + $self->state_event('host', $dev->hostid, $event); + } + $self->{skip_host}{$dev->hostid} = 1; + } + return 0; # failure +} + +sub check_bogus_md5 { + my ($self, $dev) = @_; + my $put_path = "/dev${\$dev->id}/test-write/test-md5"; + my $opts = { + headers => { "Content-MD5" => md5_base64("!") . "==", }, + content => '.', + }; + + # success is bad here, it means the server doesn't understand how to + # verify and reject corrupt bodies from Content-MD5 headers. + # most servers /will/ succeed here :< + $dev->host->http("PUT", $put_path, $opts, sub { + my ($response) = @_; + $self->on_bogus_md5_response($dev, $response); + }); +} + +sub on_bogus_md5_response { + my ($self, $dev, $response) = @_; + my $rej = $response->is_success ? 0 : 1; + my $prev = $dev->reject_bad_md5; + + if (!defined($prev) || $prev != $rej) { + debug("dev${\$dev->id}: reject_bad_md5 = $rej"); + $self->state_event('device', $dev->id, { reject_bad_md5 => $rej }); + } + return $self->check_device_done($dev); +} + +# if we fall through to here, then we know that something is not so +# good, so mark it readable which is guaranteed given we even tested +# writeability +sub dev_observed_readonly { + my ($self, $dev) = @_; + + if (!$dev->observed_readable) { + my $event = { observed_state => 'readable' }; + $self->state_event('device', $dev->id, $event); + } + $self->dev_debug($dev, 0); + return $self->check_device_done($dev); +} + +# called when all checks are done for a particular device +sub check_device_done { + my ($self, $dev) = @_; + + $self->still_alive; # Ping parent if needed so we don't time out + # given lots of devices. + delete $self->{refresh_state}->{$dev->id}; + + # if refresh_state is totally empty, we're done + if ((scalar keys %{$self->{refresh_state}}) == 0) { + $self->usage_refresh_done; + } } 1; diff --git a/lib/MogileFS/Worker/Query.pm b/lib/MogileFS/Worker/Query.pm index 02305d53..bde7f987 100644 --- a/lib/MogileFS/Worker/Query.pm +++ b/lib/MogileFS/Worker/Query.pm @@ -5,11 +5,13 @@ use strict; use warnings; use base 'MogileFS::Worker'; -use fields qw(querystarttime reqid); +use fields qw(querystarttime reqid callid); use MogileFS::Util qw(error error_code first weighted_list device_state eurl decode_url_args); use MogileFS::HTTPFile; use MogileFS::Rebalance; +use MogileFS::Config; +use MogileFS::Server; sub new { my ($class, $psock) = @_; @@ -18,6 +20,7 @@ sub new { $self->{querystarttime} = undef; $self->{reqid} = undef; + $self->{callid} = undef; return $self; } @@ -56,7 +59,7 @@ sub work { } my $newread; - my $rv = sysread($psock, $newread, 1024); + my $rv = sysread($psock, $newread, Mgd::UNIX_RCVBUF_SIZE()); if (!$rv) { if (defined $rv) { die "While reading pipe from parent, got EOF. Parent's gone. Quitting.\n"; @@ -98,13 +101,14 @@ sub process_line { # fallback to normal command handling if ($line =~ /^(\w+)\s*(.*)/) { - my ($cmd, $args) = ($1, $2); + my ($cmd, $orig_args) = ($1, $2); $cmd = lc($cmd); no strict 'refs'; my $cmd_handler = *{"cmd_$cmd"}{CODE}; + my $args = decode_url_args(\$orig_args); + $self->{callid} = $args->{callid}; if ($cmd_handler) { - my $args = decode_url_args(\$args); local $MogileFS::REQ_altzone = ($args->{zone} && $args->{zone} eq 'alt'); eval { $cmd_handler->($self, $args); @@ -163,7 +167,7 @@ sub check_domain { return $self->err_line("no_domain") unless defined $domain && length $domain; # validate domain - my $dmid = MogileFS::Domain->id_of_name($domain) or + my $dmid = eval { Mgd::domain_factory()->get_by_name($domain)->id } or return $self->err_line("unreg_domain"); return $dmid; @@ -185,14 +189,12 @@ sub cmd_test { sub cmd_clear_cache { my MogileFS::Worker::Query $self = shift; - my $args = shift; - MogileFS::Device->invalidate_cache if $args->{devices} || $args->{all}; - MogileFS::Host->invalidate_cache if $args->{hosts} || $args->{all}; - MogileFS::Class->invalidate_cache if $args->{class} || $args->{all}; - MogileFS::Domain->invalidate_cache if $args->{domain} || $args->{all}; + $self->forget_that_monitor_has_run; + $self->send_to_parent(":refresh_monitor"); + $self->wait_for_monitor; - return $self->ok_line; + return $self->ok_line(@_); } sub cmd_create_open { @@ -200,8 +202,7 @@ sub cmd_create_open { my $args = shift; # has to be filled out for some plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # first, pass this to a hook to do any manipulations needed eval {MogileFS::run_global_hook('cmd_create_open', $args)}; @@ -213,6 +214,8 @@ sub cmd_create_open { my $dmid = $args->{dmid}; my $key = $args->{key} || ""; my $multi = $args->{multi_dest} ? 1 : 0; + my $size = $args->{size} || undef; # Size is optional at create time, + # but used to grep devices if available # optional profiling of stages, if $args->{debug_profile} my @profpoints; # array of [point,hires-starttime] @@ -233,7 +236,7 @@ sub cmd_create_open { my $class = $args->{class} || ""; my $classid = 0; if (length($class)) { - $classid = MogileFS::Class->class_id($dmid, $class) + $classid = eval { Mgd::class_factory()->get_by_name($dmid, $class)->id } or return $self->err_line("unreg_class"); } @@ -245,10 +248,20 @@ sub cmd_create_open { $profstart->("find_deviceid"); - my @devices; + my @devices = Mgd::device_factory()->get_all; + if ($size) { + # We first ignore all the devices with an unknown space free. + @devices = grep { length($_->mb_free) && ($_->mb_free * 1024*1024) > $size } @devices; + + # If we didn't find any, try all the devices with an unknown space free. + # This may happen if mogstored isn't running. + if (!@devices) { + @devices = grep { !length($_->mb_free) } Mgd::device_factory()->get_all; + } + } - unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [MogileFS::Device->devices], \@devices)) { - @devices = sort_devs_by_freespace(MogileFS::Device->devices); + unless (MogileFS::run_global_hook('cmd_create_open_order_devices', [ @devices ], \@devices)) { + @devices = sort_devs_by_freespace(@devices); } # find suitable device(s) to put this file on. @@ -281,12 +294,22 @@ sub cmd_create_open { } # make sure directories exist for client to be able to PUT into + my %dir_done; + $profstart->("vivify_dir_on_all_devs"); + + my $t0 = Time::HiRes::time(); foreach my $dev (@dests) { - $profstart->("vivify_dir_on_dev" . $dev->id); my $dfid = MogileFS::DevFID->new($dev, $fidid); - $dfid->vivify_directories; + $dfid->vivify_directories(sub { + $dir_done{$dfid->devid} = Time::HiRes::time() - $t0; + }); } + # don't start the event loop if results are all cached + if (scalar keys %dir_done != scalar @dests) { + Danga::Socket->SetPostLoopCallback(sub { scalar keys %dir_done != scalar @dests }); + Danga::Socket->EventLoop; + } $profstart->("end"); # common reply variables @@ -304,6 +327,11 @@ sub cmd_create_open { sprintf("%0.03f", $profpoints[$i+1]->[1] - $profpoints[$i]->[1]); } + while (my ($devid, $time) = each %dir_done) { + my $ptnum = ++$res->{profpoints}; + $res->{"prof_${ptnum}_name"} = "vivify_dir_on_dev$devid"; + $res->{"prof_${ptnum}_time"} = sprintf("%0.03f", $time); + } } # add path info @@ -329,7 +357,6 @@ sub sort_devs_by_freespace { } sort { $b->percent_free <=> $a->percent_free; } grep { - $_->exists && $_->should_get_new_files; } @_; @@ -339,13 +366,18 @@ sub sort_devs_by_freespace { return @list; } +sub valid_key { + my ($key) = @_; + + return defined($key) && length($key); +} + sub cmd_create_close { my MogileFS::Worker::Query $self = shift; my $args = shift; # has to be filled out for some plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # call out to a hook that might modify the arguments for us MogileFS::run_global_hook('cmd_create_close', $args); @@ -356,6 +388,12 @@ sub cmd_create_close { my $fidid = $args->{fid} or return $self->err_line("no_fid"); my $devid = $args->{devid} or return $self->err_line("no_devid"); my $path = $args->{path} or return $self->err_line("no_path"); + my $checksum = $args->{checksum}; + + if ($checksum) { + $checksum = eval { MogileFS::Checksum->from_string($fidid, $checksum) }; + return $self->err_line("invalid_checksum_format") if $@; + } my $fid = MogileFS::FID->new($fidid); my $dfid = MogileFS::DevFID->new($devid, $fid); @@ -368,30 +406,32 @@ sub cmd_create_close { # find the temp file we're closing and making real. If another worker # already has it, bail out---the client closed it twice. + # this is racy, but the only expected use case is a client retrying. + # should still be fixed better once more scalable locking is available. my $trow = $sto->delete_and_return_tempfile_row($fidid) or return $self->err_line("no_temp_file"); - # if a temp file is closed without a provided-key, that means to - # delete it. - unless (defined $key && length($key)) { + # Protect against leaving orphaned uploads. + my $failed = sub { $dfid->add_to_db; $fid->delete; - return $self->ok_line; - } + }; - # see if we have a fid for this key already - my $old_fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); - if ($old_fid) { - # Fail if a file already exists for this fid. Should never - # happen, as it should not be possible to close a file twice. - return $self->err_line("fid_exists") - unless $old_fid->{fidid} != $fidid; + unless ($trow->{devids} =~ m/\b$devid\b/) { + $failed->(); + return $self->err_line("invalid_destdev", "File uploaded to invalid dest $devid. Valid devices were: " . $trow->{devids}); + } - $old_fid->delete; + # if a temp file is closed without a provided-key, that means to + # delete it. + unless (valid_key($key)) { + $failed->(); + return $self->ok_line; } # get size of file and verify that it matches what we were given, if anything - my $size = MogileFS::HTTPFile->at($path)->size; + my $httpfile = MogileFS::HTTPFile->at($path); + my $size = $httpfile->size; # size check is optional? Needs to support zero byte files. $args->{size} = -1 unless $args->{size}; @@ -399,57 +439,88 @@ sub cmd_create_close { # storage node is unreachable or the file is missing my $type = defined $size ? "missing" : "cantreach"; my $lasterr = MogileFS::Util::last_error(); + $failed->(); return $self->err_line("size_verify_error", "Expected: $args->{size}; actual: 0 ($type); path: $path; error: $lasterr") } - return $self->err_line("size_mismatch", "Expected: $args->{size}; actual: $size; path: $path") - if $args->{size} > -1 && ($args->{size} != $size); + if ($args->{size} > -1 && ($args->{size} != $size)) { + $failed->(); + return $self->err_line("size_mismatch", "Expected: $args->{size}; actual: $size; path: $path") + } + + # checksum validation is optional as it can be very expensive + # However, we /always/ verify it if the client wants us to, even + # if the class does not enforce or store it. + if ($checksum && $args->{checksumverify}) { + my $alg = $checksum->hashname; + my $actual = $httpfile->digest($alg, sub { $self->still_alive }); + if ($actual ne $checksum->{checksum}) { + $failed->(); + $actual = "$alg:" . unpack("H*", $actual); + return $self->err_line("checksum_mismatch", + "Expected: $checksum; actual: $actual; path: $path"); + } + } + + # see if we have a fid for this key already + my $old_fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); + if ($old_fid) { + # Fail if a file already exists for this fid. Should never + # happen, as it should not be possible to close a file twice. + return $self->err_line("fid_exists") + unless $old_fid->{fidid} != $fidid; + + $old_fid->delete; + } # TODO: check for EIO? # insert file_on row $dfid->add_to_db; + $checksum->maybe_save($dmid, $trow->{classid}) if $checksum; + $sto->replace_into_file( fidid => $fidid, dmid => $dmid, key => $key, length => $size, classid => $trow->{classid}, + devcount => 1, ); # mark it as needing replicating: - $fid->enqueue_for_replication(from_device => $devid); - - if ($fid->update_devcount) { - # call the hook - if this fails, we need to back the file out - my $rv = MogileFS::run_global_hook('file_stored', $args); - if (defined $rv && ! $rv) { # undef = no hooks, 1 = success, 0 = failure - $fid->delete; - return $self->err_line("plugin_aborted"); - } + $fid->enqueue_for_replication(); - # all went well - return $self->ok_line; - } else { - # FIXME: handle this better - return $self->err_line("db_error"); + # call the hook - if this fails, we need to back the file out + my $rv = MogileFS::run_global_hook('file_stored', $args); + if (defined $rv && ! $rv) { # undef = no hooks, 1 = success, 0 = failure + $fid->delete; + return $self->err_line("plugin_aborted"); } + + # all went well, we would've hit condthrow on DB errors + return $self->ok_line; } sub cmd_updateclass { my MogileFS::Worker::Query $self = shift; my $args = shift; - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; + + # call out to a hook that might modify the arguments for us, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_updateclass', $args); + return $self->err_line('plugin_aborted') if defined $rv && ! $rv; my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + valid_key($key) or return $self->err_line("no_key"); my $class = $args->{class} or return $self->err_line("no_class"); - my $classid = MogileFS::Class->class_id($dmid, $class) + my $classobj = Mgd::class_factory()->get_by_name($dmid, $class) or return $self->err_line('class_not_found'); + my $classid = $classobj->id; my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key) or return $self->err_line('invalid_key'); @@ -470,8 +541,7 @@ sub cmd_delete { my $args = shift; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_delete', $args); @@ -480,7 +550,9 @@ sub cmd_delete { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # is this fid still owned by this key? my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key) @@ -491,19 +563,144 @@ sub cmd_delete { return $self->ok_line; } +# Takes either domain/dkey or fid and tries to return as much as possible. +sub cmd_file_debug { + my MogileFS::Worker::Query $self = shift; + my $args = shift; + # Talk to the master since this is "debug mode" + my $sto = Mgd::get_store(); + my $ret = {}; + + # If a FID is provided, just use that. + my $fid; + my $fidid; + if ($args->{fid}) { + $fidid = $args->{fid}+0; + # It's not fatal if we don't find the row here. + $fid = $sto->file_row_from_fidid($args->{fid}+0); + } else { + # If not, require dmid/dkey and pick up the fid from there. + $args->{dmid} = $self->check_domain($args) or return; + return $self->err_line("no_key") unless valid_key($args->{key}); + + # now invoke the plugin, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_file_debug', $args); + return $self->err_line('plugin_aborted') + if defined $rv && ! $rv; + + $fid = $sto->file_row_from_dmid_key($args->{dmid}, $args->{key}); + return $self->err_line("unknown_key") unless $fid; + $fidid = $fid->{fid}; + } + + if ($fid) { + $fid->{domain} = Mgd::domain_factory()->get_by_id($fid->{dmid})->name; + $fid->{class} = Mgd::class_factory()->get_by_id($fid->{dmid}, + $fid->{classid})->name; + } + + # Fetch all of the queue data. + my $tfile = $sto->tempfile_row_from_fid($fidid); + my $repl = $sto->find_fid_from_file_to_replicate($fidid); + my $del = $sto->find_fid_from_file_to_delete2($fidid); + my $reb = $sto->find_fid_from_file_to_queue($fidid, REBAL_QUEUE); + my $fsck = $sto->find_fid_from_file_to_queue($fidid, FSCK_QUEUE); + + # Fetch file_on rows, and turn into paths. + my @devids = $sto->fid_devids($fidid); + for my $devid (@devids) { + # Won't matter if we can't make the path (dev is dead/deleted/etc) + eval { + my $dfid = MogileFS::DevFID->new($devid, $fidid); + my $path = $dfid->get_url; + $ret->{'devpath_' . $devid} = $path; + }; + } + $ret->{devids} = join(',', @devids) if @devids; + + # Always look for a checksum + my $checksum = Mgd::get_store()->get_checksum($fidid); + if ($checksum) { + $checksum = MogileFS::Checksum->new($checksum); + $ret->{checksum} = $checksum->info; + } else { + $ret->{checksum} = 'NONE'; + } + + # Return file row (if found) and all other data. + my %toret = (fid => $fid, tempfile => $tfile, replqueue => $repl, + delqueue => $del, rebqueue => $reb, fsckqueue => $fsck); + while (my ($key, $hash) = each %toret) { + while (my ($name, $val) = each %$hash) { + $ret->{$key . '_' . $name} = $val; + } + } + + return $self->err_line("unknown_fid") unless keys %$ret; + return $self->ok_line($ret); +} + +sub cmd_file_info { + my MogileFS::Worker::Query $self = shift; + my $args = shift; + + # validate domain for plugins + $args->{dmid} = $self->check_domain($args) or return; + + # now invoke the plugin, abort if it tells us to + my $rv = MogileFS::run_global_hook('cmd_file_info', $args); + return $self->err_line('plugin_aborted') + if defined $rv && ! $rv; + + # validate parameters + my $dmid = $args->{dmid}; + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); + + my $fid; + Mgd::get_store()->slaves_ok(sub { + $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $key); + }); + $fid or return $self->err_line("unknown_key"); + + my $ret = {}; + $ret->{fid} = $fid->id; + $ret->{domain} = Mgd::domain_factory()->get_by_id($fid->dmid)->name; + my $class = Mgd::class_factory()->get_by_id($fid->dmid, $fid->classid); + $ret->{class} = $class->name; + if ($class->{hashtype}) { + my $checksum = Mgd::get_store()->get_checksum($fid->id); + if ($checksum) { + $checksum = MogileFS::Checksum->new($checksum); + $ret->{checksum} = $checksum->info; + } else { + $ret->{checksum} = "MISSING"; + } + } + $ret->{key} = $key; + $ret->{'length'} = $fid->length; + $ret->{devcount} = $fid->devcount; + # Only if requested, also return the raw devids. + # Caller should use get_paths if they intend to fetch the file. + if ($args->{devices}) { + $ret->{devids} = join(',', $fid->devids); + } + + return $self->ok_line($ret); +} + sub cmd_list_fids { my MogileFS::Worker::Query $self = shift; my $args = shift; # validate parameters my $fromfid = ($args->{from} || 0)+0; - my $tofid = ($args->{to} || 0)+0; - $tofid ||= ($fromfid + 100); - $tofid = ($fromfid + 100) - if $tofid > $fromfid + 100 || - $tofid < $fromfid; + my $count = ($args->{to} || 0)+0; + $count ||= 100; + $count = 500 if $count > 500 || $count < 0; - my $rows = Mgd::get_store()->file_row_from_fidid_range($fromfid, $tofid); + my $rows = Mgd::get_store()->file_row_from_fidid_range($fromfid, $count); return $self->err_line('failure') unless $rows; return $self->ok_line({ fid_count => 0 }) unless @$rows; @@ -517,8 +714,10 @@ sub cmd_list_fids { $ct++; my $fid = $r->{fid}; $ret->{"fid_${ct}_fid"} = $fid; - $ret->{"fid_${ct}_domain"} = ($domains{$r->{dmid}} ||= MogileFS::Domain->name_of_id($r->{dmid})); - $ret->{"fid_${ct}_class"} = ($classes{$r->{dmid}}{$r->{classid}} ||= MogileFS::Class->class_name($r->{dmid}, $r->{classid})); + $ret->{"fid_${ct}_domain"} = ($domains{$r->{dmid}} ||= + Mgd::domain_factory()->get_by_id($r->{dmid})->name); + $ret->{"fid_${ct}_class"} = ($classes{$r->{dmid}}{$r->{classid}} ||= + Mgd::class_factory()->get_by_id($r->{dmid}, $r->{classid})->name); $ret->{"fid_${ct}_key"} = $r->{dkey}; $ret->{"fid_${ct}_length"} = $r->{length}; $ret->{"fid_${ct}_devcount"} = $r->{devcount}; @@ -532,21 +731,13 @@ sub cmd_list_keys { my $args = shift; # validate parameters - my $dmid = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + my $dmid = $self->check_domain($args) or return; my ($prefix, $after, $limit) = ($args->{prefix}, $args->{after}, $args->{limit}); if (defined $prefix and $prefix ne '') { # now validate that after matches prefix return $self->err_line('after_mismatch') if $after && $after !~ /^$prefix/; - - # verify there are no % or \ characters - return $self->err_line('invalid_chars') - if $prefix =~ /[%\\]/; - - # escape underscores - $prefix =~ s/_/\\_/g; } $limit ||= 1000; @@ -574,10 +765,11 @@ sub cmd_rename { my $args = shift; # validate parameters - my $dmid = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + my $dmid = $self->check_domain($args) or return; my ($fkey, $tkey) = ($args->{from_key}, $args->{to_key}); - return $self->err_line("no_key") unless $fkey && $tkey; + unless (valid_key($fkey) && valid_key($tkey)) { + return $self->err_line("no_key"); + } my $fid = MogileFS::FID->new_from_dmid_and_key($dmid, $fkey) or return $self->err_line("unknown_key"); @@ -592,18 +784,15 @@ sub cmd_get_hosts { my MogileFS::Worker::Query $self = shift; my $args = shift; - MogileFS::Host->invalidate_cache; - my $ret = { hosts => 0 }; - foreach my $host (MogileFS::Host->hosts) { + for my $host (Mgd::host_factory()->get_all) { next if defined $args->{hostid} && $host->id != $args->{hostid}; my $n = ++$ret->{hosts}; - foreach my $key (qw(hostid status hostname hostip - http_port http_get_port - altip altmask)) - { + my $fields = $host->fields(qw(hostid status hostname hostip http_port + http_get_port altip altmask)); + while (my ($key, $val) = each %$fields) { # must be regular data so copy it in - $ret->{"host${n}_$key"} = $host->field($key); + $ret->{"host${n}_$key"} = $val; } } @@ -615,11 +804,11 @@ sub cmd_get_devices { my $args = shift; my $ret = { devices => 0 }; - foreach my $dev (MogileFS::Device->devices) { + for my $dev (Mgd::device_factory()->get_all) { next if defined $args->{devid} && $dev->id != $args->{devid}; my $n = ++$ret->{devices}; - my $sum = $dev->overview_hashref; + my $sum = $dev->fields; while (my ($key, $val) = each %$sum) { $ret->{"dev${n}_$key"} = $val; } @@ -639,25 +828,21 @@ sub cmd_create_device { my $devid = $args->{devid}; return $self->err_line("invalid_devid") unless $devid && $devid =~ /^\d+$/; - my ($host, $hostid); + my $hostid; - MogileFS::Host->check_cache; + my $sto = Mgd::get_store(); if ($args->{hostid} && $args->{hostid} =~ /^\d+$/) { - $hostid = $args->{hostid}; - $host = MogileFS::Host->of_hostid($hostid); - return $self->err_line("unknown_hostid") unless $host && $host->exists; + $hostid = $sto->get_hostid_by_id($args->{hostid}); + return $self->err_line("unknown_hostid") unless $hostid; } elsif (my $hname = $args->{hostname}) { - $host = MogileFS::Host->of_hostname($hname); - return $self->err_line("unknown_host") unless $host; - $hostid = $host->id; + $hostid = $sto->get_hostid_by_name($hname); + return $self->err_line("unknown_host") unless $hostid; } else { return $self->err_line("bad_args", "No hostid/hostname parameter"); } - if (eval { MogileFS::Device->create(devid => $devid, - hostid => $hostid, - status => $status) }) { - return $self->ok_line; + if (eval { $sto->create_device($devid, $hostid, $status) }) { + return $self->cmd_clear_cache; } my $errc = error_code($@); @@ -672,9 +857,7 @@ sub cmd_create_domain { my $domain = $args->{domain} or return $self->err_line('no_domain'); - # TODO: auth/permissions? - - my $dom = eval { MogileFS::Domain->create($domain); }; + my $dom = eval { Mgd::get_store()->create_domain($domain); }; if ($@) { if (error_code($@) eq "dup") { return $self->err_line('domain_exists'); @@ -682,7 +865,7 @@ sub cmd_create_domain { return $self->err_line('failure', "$@"); } - return $self->ok_line({ domain => $domain }); + return $self->cmd_clear_cache({ domain => $domain }); } sub cmd_delete_domain { @@ -692,15 +875,17 @@ sub cmd_delete_domain { my $domain = $args->{domain} or return $self->err_line('no_domain'); - my $dom = MogileFS::Domain->of_namespace($domain) or + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - if (eval { $dom->delete }) { - return $self->ok_line({ domain => $domain }); + if (eval { $sto->delete_domain($dmid) }) { + return $self->cmd_clear_cache({ domain => $domain }); } my $err = error_code($@); return $self->err_line('domain_has_files') if $err eq "has_files"; + return $self->err_line('domain_has_classes') if $err eq "has_classes"; return $self->err_line("failure"); } @@ -725,23 +910,46 @@ sub cmd_create_class { return $self->err_line('invalid_replpolicy', $@) if $@; } - my $dom = MogileFS::Domain->of_namespace($domain) or + my $hashtype = $args->{hashtype}; + if ($hashtype && $hashtype ne 'NONE') { + my $tmp = $MogileFS::Checksum::NAME2TYPE{$hashtype}; + return $self->err_line('invalid_hashtype') unless $tmp; + $hashtype = $tmp; + } + + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - my $cls = $dom->class($class); + my $clsid = $sto->get_classid_by_name($dmid, $class); + if (!defined $clsid && $args->{update} && $class eq 'default') { + $args->{update} = 0; + } if ($args->{update}) { - return $self->err_line('class_not_found') if ! $cls; - $cls->set_name($class); + return $self->err_line('class_not_found') if ! defined $clsid; + $sto->update_class_name(dmid => $dmid, classid => $clsid, + classname => $class); } else { - return $self->err_line('class_exists') if $cls; - $cls = $dom->create_class($class); + $clsid = eval { $sto->create_class($dmid, $class); }; + if ($@) { + if (error_code($@) eq "dup") { + return $self->err_line('class_exists'); + } + return $self->err_line('failure', "$@"); + } } - $cls->set_mindevcount($mindevcount); + $sto->update_class_mindevcount(dmid => $dmid, classid => $clsid, + mindevcount => $mindevcount); # don't erase an existing replpolicy if we're not setting a new one. - $cls->set_replpolicy($replpolicy) if $replpolicy; + $sto->update_class_replpolicy(dmid => $dmid, classid => $clsid, + replpolicy => $replpolicy) if $replpolicy; + if ($hashtype) { + $sto->update_class_hashtype(dmid => $dmid, classid => $clsid, + hashtype => $hashtype eq 'NONE' ? undef : $hashtype); + } # return success - return $self->ok_line({ class => $class, mindevcount => $mindevcount, domain => $domain }); + return $self->cmd_clear_cache({ class => $class, mindevcount => $mindevcount, domain => $domain }); } sub cmd_update_class { @@ -761,13 +969,16 @@ sub cmd_delete_class { my $class = $args->{class}; return $self->err_line('no_class') unless length $domain; - my $dom = MogileFS::Domain->of_namespace($domain) or + return $self->err_line('nodel_default_class') if $class eq 'default'; + + my $sto = Mgd::get_store(); + my $dmid = $sto->get_domainid_by_name($domain) or return $self->err_line('domain_not_found'); - my $cls = $dom->class($class) or - return $self->err_line('class_not_found'); + my $clsid = $sto->get_classid_by_name($dmid, $class); + return $self->err_line('class_not_found') unless defined $clsid; - if (eval { $cls->delete }) { - return $self->ok_line({ domain => $domain, class => $class }); + if (eval { Mgd::get_store()->delete_class($dmid, $clsid) }) { + return $self->cmd_clear_cache({ domain => $domain, class => $class }); } my $errc = error_code($@); @@ -782,14 +993,15 @@ sub cmd_create_host { my $hostname = $args->{host} or return $self->err_line('no_host'); - my $host = MogileFS::Host->of_hostname($hostname); + my $sto = Mgd::get_store(); + my $hostid = $sto->get_hostid_by_name($hostname); # if we're creating a new host, require ip/port, and default to # host being down if client didn't specify if ($args->{update}) { - return $self->err_line('host_not_found') unless $host; + return $self->err_line('host_not_found') unless $hostid; } else { - return $self->err_line('host_exists') if $host; + return $self->err_line('host_exists') if $hostid; return $self->err_line('no_ip') unless $args->{ip}; return $self->err_line('no_port') unless $args->{port}; $args->{status} ||= 'down'; @@ -797,26 +1009,23 @@ sub cmd_create_host { if ($args->{status}) { return $self->err_line('unknown_state') - unless MogileFS::Host->valid_initial_state($args->{status}); + unless MogileFS::Host->valid_state($args->{status}); } # arguments all good, let's do it. - $host ||= MogileFS::Host->create($hostname, $args->{ip}); - my %setter = ( - status => "set_status", - ip => "set_ip", - port => "set_http_port", - getport => "set_http_get_port", - altip => "set_alt_ip", - altmask => "set_alt_mask", - ); - while (my ($f, $meth) = each %setter) { - $host->$meth($args->{$f}) if exists $args->{$f}; - } + $hostid ||= $sto->create_host($hostname, $args->{ip}); + + # Protocol mismatch data fixup. + $args->{hostip} = delete $args->{ip} if exists $args->{ip}; + $args->{http_port} = delete $args->{port} if exists $args->{port}; + $args->{http_get_port} = delete $args->{getport} if exists $args->{getport}; + my @toupdate = grep { exists $args->{$_} } qw(status hostip http_port + http_get_port altip altmask); + $sto->update_host($hostid, { map { $_ => $args->{$_} } @toupdate }); # return success - return $self->ok_line($host->overview_hashref); + return $self->cmd_clear_cache({ hostid => $hostid, hostname => $hostname }); } sub cmd_update_host { @@ -831,30 +1040,28 @@ sub cmd_delete_host { my MogileFS::Worker::Query $self = shift; my $args = shift; - my $host = MogileFS::Host->of_hostname($args->{host}) + my $sto = Mgd::get_store(); + my $hostid = $sto->get_hostid_by_name($args->{host}) or return $self->err_line('unknown_host'); - my $hostid = $host->id; - - foreach my $dev (MogileFS::Device->devices) { + # TODO: $sto->delete_host should have a "has_devices" test internally + for my $dev ($sto->get_all_devices) { return $self->err_line('host_not_empty') - if $dev->hostid == $hostid; + if $dev->{hostid} == $hostid; } - $host->delete; + $sto->delete_host($hostid); - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_get_domains { my MogileFS::Worker::Query $self = shift; my $args = shift; - MogileFS::Domain->invalidate_cache; - my $ret = {}; my $dm_n = 0; - foreach my $dom (MogileFS::Domain->domains) { + for my $dom (Mgd::domain_factory()->get_all) { $dm_n++; $ret->{"domain${dm_n}"} = $dom->name; my $cl_n = 0; @@ -864,6 +1071,7 @@ sub cmd_get_domains { $ret->{"domain${dm_n}class${cl_n}mindevcount"} = $cl->mindevcount; $ret->{"domain${dm_n}class${cl_n}replpolicy"} = $cl->repl_policy_string; + $ret->{"domain${dm_n}class${cl_n}hashtype"} = $cl->hashtype_string; } $ret->{"domain${dm_n}classes"} = $cl_n; } @@ -877,17 +1085,17 @@ sub cmd_get_paths { my $args = shift; # memcache mappings are as follows: - # mogfid:: -> fidid (and TODO: invalidate this when key is replaced) - # mogdevids: -> \@devids (and TODO: invalidate when the replication or deletion is run!) + # mogfid:: -> fidid + # mogdevids: -> \@devids (and TODO: invalidate when deletion is run!) # if you specify 'noverify', that means a correct answer isn't needed and memcache can # be used. - my $use_memc = $args->{noverify}; - my $memc = $use_memc ? MogileFS::Config->memcache_client : undef; + my $memc = MogileFS::Config->memcache_client; + my $get_from_memc = $memc && $args->{noverify}; + my $memcache_ttl = MogileFS::Config->server_setting_cached("memcache_ttl") || 3600; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_get_paths', $args); @@ -896,7 +1104,9 @@ sub cmd_get_paths { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # We default to returning two possible paths. # but the client may ask for more if they want. @@ -907,7 +1117,7 @@ sub cmd_get_paths { my $fid; my $need_fid_in_memcache = 0; my $mogfid_memkey = "mogfid:$args->{dmid}:$key"; - if ($memc) { + if ($get_from_memc) { if (my $fidid = $memc->get($mogfid_memkey)) { $fid = MogileFS::FID->new($fidid); } else { @@ -922,9 +1132,9 @@ sub cmd_get_paths { } # add to memcache, if needed. for an hour. - $memc->add($mogfid_memkey, $fid->id, 3600) if $need_fid_in_memcache; + $memc->set($mogfid_memkey, $fid->id, $memcache_ttl ) if $need_fid_in_memcache || ($memc && !$get_from_memc); - my $dmap = MogileFS::Device->map; + my $dmap = Mgd::device_factory()->map_by_id; my $ret = { paths => 0, @@ -934,7 +1144,7 @@ sub cmd_get_paths { my @fid_devids; my $need_devids_in_memcache = 0; my $devid_memkey = "mogdevids:" . $fid->id; - if ($memc) { + if ($get_from_memc) { if (my $list = $memc->get($devid_memkey)) { @fid_devids = @$list; } else { @@ -945,7 +1155,7 @@ sub cmd_get_paths { Mgd::get_store()->slaves_ok(sub { @fid_devids = $fid->devids; }); - $memc->add($devid_memkey, \@fid_devids, 3600) if $need_devids_in_memcache; + $memc->set($devid_memkey, \@fid_devids, $memcache_ttl ) if $need_devids_in_memcache || ($memc && !$get_from_memc); } my @devices = map { $dmap->{$_} } @fid_devids; @@ -958,18 +1168,18 @@ sub cmd_get_paths { # keep one partially-bogus path around just in case we have nothing else to send. my $backup_path; + # files on devices set for drain may disappear soon. + my @drain_paths; + # construct result paths foreach my $dev (@sorted_devs) { - next unless $dev && ($dev->can_read_from); + next unless $dev && $dev->host; - my $host = $dev->host; - next unless $dev && $host; my $dfid = MogileFS::DevFID->new($dev, $fid); my $path = $dfid->get_url; - my $currently_down = - $host->observed_unreachable || $dev->observed_unreachable; + my $currently_up = $dev->should_read_from; - if ($currently_down) { + if (! $currently_up) { $backup_path = $path; next; } @@ -980,11 +1190,26 @@ sub cmd_get_paths { $args->{noverify} || $dfid->size_matches; + if ($dev->dstate->should_drain) { + push @drain_paths, $path; + next; + } + my $n = ++$ret->{paths}; $ret->{"path$n"} = $path; last if $n == $pathcount; # one verified, one likely seems enough for now. time will tell. } + # deprioritize devices set for drain, they could disappear soon... + # Clients /should/ try to use lower-numbered paths first to avoid this. + if ($ret->{paths} < $pathcount && @drain_paths) { + foreach my $path (@drain_paths) { + my $n = ++$ret->{paths}; + $ret->{"path$n"} = $path; + last if $n == $pathcount; + } + } + # use our backup path if all else fails if ($backup_path && ! $ret->{paths}) { $ret->{paths} = 1; @@ -1048,8 +1273,7 @@ sub cmd_edit_file { my $memc = MogileFS::Config->memcache_client; # validate domain for plugins - $args->{dmid} = $self->check_domain($args) - or return $self->err_line('domain_not_found'); + $args->{dmid} = $self->check_domain($args) or return; # now invoke the plugin, abort if it tells us to my $rv = MogileFS::run_global_hook('cmd_get_paths', $args); @@ -1058,7 +1282,9 @@ sub cmd_edit_file { # validate parameters my $dmid = $args->{dmid}; - my $key = $args->{key} or return $self->err_line("no_key"); + my $key = $args->{key}; + + valid_key($key) or return $self->err_line("no_key"); # get DB handle my $fid; @@ -1079,7 +1305,7 @@ sub cmd_edit_file { # add to memcache, if needed. for an hour. $memc->add($mogfid_memkey, $fid->id, 3600) if $need_fid_in_memcache; - my $dmap = MogileFS::Device->map; + my $dmap = Mgd::device_factory()->map_by_id; my @devices_with_weights; @@ -1124,24 +1350,20 @@ sub cmd_edit_file { @list = grep { my $devid = $_; my $dev = $dmap->{$devid}; - my $host = $dev ? $dev->host : undef; - $dev - && $host - && $dev->can_read_from - && !($host->observed_unreachable || $dev->observed_unreachable); + $dev && $dev->should_read_from; } @list; # Take first remaining device from list my $devid = $list[0]; - my $class = MogileFS::Class->of_fid($fid); + my $classid = $fid->classid; my $newfid = eval { Mgd::get_store()->register_tempfile( fid => undef, # undef => let the store pick a fid dmid => $dmid, key => $key, # This tempfile will ultimately become this key - classid => $class->classid, + classid => $classid, devids => $devid, ); }; @@ -1169,7 +1391,7 @@ sub cmd_edit_file { $ret->{newpath} = $paths[1]; $ret->{fid} = $newfid; $ret->{devid} = $devid; - $ret->{class} = $class->classid; + $ret->{class} = $classid; return $self->ok_line($ret); } @@ -1182,12 +1404,14 @@ sub cmd_set_weight { return $self->err_line('bad_params') unless $hostname && $devid && $weight >= 0; - my $dev = MogileFS::Device->from_devid_and_hostname($devid, $hostname) - or return $self->err_line('host_mismatch'); + my $dev = Mgd::device_factory()->get_by_id($devid); + return $self->err_line('no_device') unless $dev; + return $self->err_line('host_mismatch') + unless $dev->host->hostname eq $hostname; - $dev->set_weight($weight); + Mgd::get_store()->set_device_weight($dev->id, $weight); - return $self->ok_line; + return $self->cmd_clear_cache; } sub cmd_set_state { @@ -1201,15 +1425,17 @@ sub cmd_set_state { return $self->err_line('bad_params') unless $hostname && $devid && $dstate; - my $dev = MogileFS::Device->from_devid_and_hostname($devid, $hostname) - or return $self->err_line('host_mismatch'); + my $dev = Mgd::device_factory()->get_by_id($devid); + return $self->err_line('no_device') unless $dev; + return $self->err_line('host_mismatch') + unless $dev->host->hostname eq $hostname; # make sure the destination state isn't too high return $self->err_line('state_too_high') unless $dev->can_change_to_state($state); - $dev->set_state($state); - return $self->ok_line; + Mgd::get_store()->set_device_state($dev->id, $state); + return $self->cmd_clear_cache; } sub cmd_noop { @@ -1225,31 +1451,6 @@ sub cmd_replicate_now { return $self->ok_line({ count => int($rv) }); } -sub cmd_checker { - my MogileFS::Worker::Query $self = shift; - my $args = shift; - - my $new_setting; - if ($args->{disable}) { - $new_setting = 'off'; - } elsif ($args->{level}) { - # they want to turn it on or change the level, so let's ensure they - # specified a valid level - if (MogileFS::Worker::Checker::is_valid_level($args->{level})) { - $new_setting = $args->{level}; - } else { - return $self->err_line('invalid_checker_level'); - } - } - - if (defined $new_setting) { - MogileFS::Config->set_server_setting('fsck_enable', $new_setting); - return $self->ok_line; - } - - $self->err_line('failure'); -} - sub cmd_set_server_setting { my MogileFS::Worker::Query $self = shift; my $args = shift; @@ -1264,6 +1465,14 @@ sub cmd_set_server_setting { return $self->err_line("invalid_format", $@) if $@; MogileFS::Config->set_server_setting($key, $cleanval); + + # GROSS HACK: slave settings are managed directly by MogileFS::Client, but + # I need to add a version key, so we check and inject that code here. + # FIXME: Move this when slave keys are managed by query worker commands! + if ($key =~ /^slave_/) { + Mgd::get_store()->incr_server_setting('slave_version', 1); + } + return $self->ok_line; } @@ -1312,8 +1521,9 @@ sub cmd_fsck_start { my $intss = sub { MogileFS::Config->server_setting($_[0]) || 0 }; my $checked_fid = $intss->("fsck_highest_fid_checked"); my $final_fid = $intss->("fsck_fid_at_end"); - if ($checked_fid && $final_fid && $checked_fid >= $final_fid) { - $self->_do_fsck_reset or return $self->err_line; + if (($checked_fid && $final_fid && $checked_fid >= $final_fid) || + (!$final_fid && !$checked_fid)) { + $self->_do_fsck_reset or return $self->err_line("db"); } # set params for stats: @@ -1349,27 +1559,34 @@ sub cmd_fsck_reset { $sto->set_server_setting("fsck_highest_fid_checked", ($args->{startpos} ? $args->{startpos} : "0")); - $self->_do_fsck_reset or return $self->err_line; + $self->_do_fsck_reset or return $self->err_line("db"); return $self->ok_line; } sub _do_fsck_reset { my MogileFS::Worker::Query $self = shift; - my $sto = Mgd::get_store(); - $sto->set_server_setting("fsck_start_time", undef); - $sto->set_server_setting("fsck_stop_time", undef); - $sto->set_server_setting("fsck_fids_checked", 0); - $sto->set_server_setting("fsck_fid_at_end", $sto->max_fidid); - - # clear existing event counts summaries. - my $ss = $sto->server_settings; - foreach my $k (keys %$ss) { - next unless $k =~ /^fsck_sum_evcount_/; - $sto->set_server_setting($k, undef); + eval { + my $sto = Mgd::get_store(); + $sto->set_server_setting("fsck_start_time", undef); + $sto->set_server_setting("fsck_stop_time", undef); + $sto->set_server_setting("fsck_fids_checked", 0); + $sto->set_server_setting("fsck_fid_at_end", $sto->max_fidid); + + # clear existing event counts summaries. + my $ss = $sto->server_settings; + foreach my $k (keys %$ss) { + next unless $k =~ /^fsck_sum_evcount_/; + $sto->set_server_setting($k, undef); + } + my $logid = $sto->max_fsck_logid; + $sto->set_server_setting("fsck_start_maxlogid", $logid); + $sto->set_server_setting("fsck_logid_processed", $logid); + }; + if ($@) { + error("DB error in _do_fsck_reset: $@"); + return 0; } - my $logid = $sto->max_fsck_logid; - $sto->set_server_setting("fsck_start_maxlogid", $logid); - $sto->set_server_setting("fsck_logid_processed", $logid); + return 1; } sub cmd_fsck_clearlog { @@ -1446,20 +1663,23 @@ sub cmd_rebalance_start { return $self->err_line("rebal_running", "rebalance is already running") if $rebal_host; return $self->err_line("fsck_running", "fsck running; cannot run rebalance at same time") if $fsck_host; - my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); - return $self->err_line('no_rebal_policy') unless $rebal_pol; - - my $rebal = MogileFS::Rebalance->new; - $rebal->policy($rebal_pol); - my @devs = MogileFS::Device->devices; - $rebal->init(\@devs); - my $sdevs = $rebal->source_devices; - - my $state = $rebal->save_state; - MogileFS::Config->set_server_setting('rebal_state', $state); + my $rebal_state = MogileFS::Config->server_setting('rebal_state'); + unless ($rebal_state) { + my $rebal_pol = MogileFS::Config->server_setting('rebal_policy'); + return $self->err_line('no_rebal_policy') unless $rebal_pol; + + my $rebal = MogileFS::Rebalance->new; + $rebal->policy($rebal_pol); + my @devs = Mgd::device_factory()->get_all; + $rebal->init(\@devs); + my $sdevs = $rebal->source_devices; + + $rebal_state = $rebal->save_state; + MogileFS::Config->set_server_setting('rebal_state', $rebal_state); + } # TODO: register start time somewhere. MogileFS::Config->set_server_setting('rebal_host', MogileFS::Config->hostname); - return $self->ok_line({ state => $state }); + return $self->ok_line({ state => $rebal_state }); } sub cmd_rebalance_test { @@ -1469,7 +1689,7 @@ sub cmd_rebalance_test { return $self->err_line('no_rebal_policy') unless $rebal_pol; my $rebal = MogileFS::Rebalance->new; - my @devs = MogileFS::Device->devices; + my @devs = Mgd::device_factory()->get_all; $rebal->policy($rebal_pol); $rebal->init(\@devs); @@ -1485,14 +1705,23 @@ sub cmd_rebalance_test { return $self->ok_line($ret); } +sub cmd_rebalance_reset { + my MogileFS::Worker::Query $self = shift; + my $host = MogileFS::Config->server_setting('rebal_host'); + if ($host) { + return $self->err_line("rebal_running", "rebalance is running") if $host; + } + MogileFS::Config->set_server_setting('rebal_state', undef); + return $self->ok_line; +} + sub cmd_rebalance_stop { my MogileFS::Worker::Query $self = shift; my $host = MogileFS::Config->server_setting('rebal_host'); unless ($host) { return $self->err_line('rebal_not_started'); } - # TODO: put stop time somewhere. - MogileFS::Config->set_server_setting('rebal_host', undef); + MogileFS::Config->set_server_setting('rebal_signal', 'stop'); return $self->ok_line; } @@ -1513,6 +1742,7 @@ sub cmd_rebalance_set_policy { } MogileFS::Config->set_server_setting('rebal_policy', $args->{policy}); + MogileFS::Config->set_server_setting('rebal_state', undef); return $self->ok_line; } @@ -1528,6 +1758,7 @@ sub ok_line { my $id = defined $self->{reqid} ? "$self->{reqid} " : ''; my $args = shift || {}; + $args->{callid} = $self->{callid} if defined $self->{callid}; my $argline = join('&', map { eurl($_) . "=" . eurl($args->{$_}) } keys %$args); $self->send_to_parent("${id}${delay}OK $argline"); return 1; @@ -1556,12 +1787,12 @@ sub err_line { 'host_mismatch' => "The device specified doesn't belong to the host specified", 'host_not_empty' => "Unable to delete host; it contains devices still", 'host_not_found' => "Host not found", - 'invalid_chars' => "Patterns must not contain backslashes (\\) or percent signs (%).", 'invalid_checker_level' => "Checker level invalid. Please see documentation on this command.", 'invalid_mindevcount' => "The mindevcount must be at least 1", 'key_exists' => "Target key name already exists; can't overwrite.", 'no_class' => "No class provided", 'no_devices' => "No devices found to store file", + 'no_device' => "Device not found", 'no_domain' => "No domain provided", 'no_host' => "No host provided", 'no_ip' => "IP required to create host", @@ -1577,17 +1808,23 @@ sub err_line { 'rebal_not_started' => "Rebalance not running", 'no_rebal_state' => "No available rebalance status", 'no_rebal_policy' => "No rebalance policy available", + 'nodel_default_class' => "Cannot delete the default class", }->{$err_code} || $err_code; my $delay = ''; if ($self->{querystarttime}) { $delay = sprintf("%.4f ", Time::HiRes::tv_interval($self->{querystarttime})); $self->{querystarttime} = undef; + } else { + # don't send another ERR line if we already sent one + error("err_line called redundantly with $err_code ( " . eurl($err_text) . ")"); + return 0; } my $id = defined $self->{reqid} ? "$self->{reqid} " : ''; + my $callid = defined $self->{callid} ? ' ' . eurl($self->{callid}) : ''; - $self->send_to_parent("${id}${delay}ERR $err_code " . eurl($err_text)); + $self->send_to_parent("${id}${delay}ERR $err_code " . eurl($err_text) . $callid); return 0; } diff --git a/lib/MogileFS/Worker/Reaper.pm b/lib/MogileFS/Worker/Reaper.pm index d3f1071d..1251516f 100644 --- a/lib/MogileFS/Worker/Reaper.pm +++ b/lib/MogileFS/Worker/Reaper.pm @@ -3,8 +3,14 @@ package MogileFS::Worker::Reaper; use strict; use base 'MogileFS::Worker'; -use MogileFS::Util qw(every error debug); +use MogileFS::Server; +use MogileFS::Util qw(error debug); use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); +use constant REAP_INTERVAL => 5; +use constant REAP_BACKOFF_MIN => 60; + +# completely forget about devices we've reaped after 2 hours of idleness +use constant REAP_BACKOFF_MAX => 7200; sub new { my ($class, $psock) = @_; @@ -18,58 +24,164 @@ sub watchdog_timeout { return 240; } -my %all_empty; # devid -> bool, if all empty of files in file_on +# order is important here: +# +# first, add fid to file_to_replicate table. it +# shouldn't matter if the replicator gets to this +# before the subsequent 'forget_about' method, as the +# replicator will treat dead file_on devices as +# non-existent anyway. however, it is important that +# we enqueue it for replication first, before we +# forget about that file_on row, otherwise a failure +# after/during 'forget_about' could leave a stranded +# file on a dead device and we'd never fix it. +sub reap_fid { + my ($self, $fid, $dev) = @_; + + $fid->enqueue_for_replication(in => 1); + $dev->forget_about($fid); +} + +# this returns 1000 by default +sub reaper_inject_limit { + my ($self) = @_; + + my $sto = Mgd::get_store(); + my $max = MogileFS::Config->server_setting_cached('queue_size_for_reaper'); + my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; + + # max defaults to zero, meaning we inject $limit every wakeup + if ($max) { + # if a queue size limit is configured for reaper, prevent too many + # files from entering the repl queue: + my $len = $sto->deferred_repl_queue_length; + my $space_left = $max - $len; + + $limit = $space_left if ($limit > $space_left); + + # limit may end up being negative here since other processes + # can inject into the deferred replication queue, reaper is + # the only one which can respect this queue size + $limit = 0 if $limit < 0; + } + + return $limit; +} + +# we pass the $devid here (instead of a Device object) to avoid +# potential memory leaks since this sub reschedules itself to run +# forever. $delay is the current delay we were scheduled at +sub reap_dev { + my ($self, $devid, $delay) = @_; + + # ensure the master DB is up, retry in REAP_INTERVAL if down + unless ($self->validate_dbh) { + $delay = REAP_INTERVAL; + Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); + return; + } + + my $limit = $self->reaper_inject_limit; + + # just in case a user mistakenly nuked a devid from the device table: + my $dev = Mgd::device_factory()->get_by_id($devid); + unless ($dev) { + error("No device row for dev$devid, cannot reap"); + $delay = undef; + } + + # user resurrected a "dead" device, not supported, really... + if (!$dev->dstate->is_perm_dead) { + Mgd::log("dev$devid is no longer dead to reaper"); + return; + } + + # limit == 0 if we hit the queue size limit, we'll just reschedule + if ($limit && $dev) { + my $sto = Mgd::get_store(); + my $lock = "mgfs:reaper"; + my $lock_timeout = $self->watchdog_timeout / 4; + my @fids; + + if ($sto->get_lock($lock, $lock_timeout)) { + @fids = $dev->fid_list(limit => $limit); + if (@fids) { + $self->still_alive; + foreach my $fid (@fids) { + $self->reap_fid($fid, $dev); + } + } + $sto->release_lock($lock); + # if we've found any FIDs (perhaps even while backing off) + # ensure we try to find more soon: + if (@fids) { + $delay = REAP_INTERVAL; + } else { + $delay = $self->reap_dev_backoff_delay($delay); + } + } else { + # No lock after a long lock_timeout? Try again soon. + # We should never get here under MySQL, and rarely for other DBs. + debug("get_lock($lock, $lock_timeout) failed"); + $delay = REAP_INTERVAL; + } + } + + return unless defined $delay; + + # schedule another update, delay could be REAP_BACKOFF_MAX + Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); +} + +# called when we're hopefully all done with a device, but reschedule +# into the future in case the replicator had an out-of-date cache and the +# "dead" device was actually writable. +sub reap_dev_backoff_delay { + my ($self, $delay) = @_; + + return REAP_BACKOFF_MIN if ($delay < REAP_BACKOFF_MIN); + + $delay *= 2; + return $delay > REAP_BACKOFF_MAX ? undef : $delay; +} + +# looks for dead devices sub work { my $self = shift; - every(5, sub { - $self->parent_ping; + # ensure we get monitor updates + Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); + my %devid_seen; + my $reap_check; + $reap_check = sub { # get db and note we're starting a run + $self->parent_ping; debug("Reaper running; looking for dead devices"); foreach my $dev (grep { $_->dstate->is_perm_dead } - MogileFS::Device->devices) + Mgd::device_factory()->get_all) { - my $devid = $dev->id; - next if $all_empty{$devid}; + next if $devid_seen{$dev->id}; - my @fids = $dev->fid_list(limit => 1000); - unless (@fids) { - $all_empty{$devid} = 1; - next; - } - $self->parent_ping; - - foreach my $fid (@fids) { - # order is important here: - - # first, add fid to file_to_replicate table. it - # shouldn't matter if the replicator gets to this - # before the subsequent 'forget_about' method, as the - # replicator will treat dead file_on devices as - # non-existent anyway. however, it is important that - # we enqueue it for replication first, before we - # forget about that file_on row, otherwise a failure - # after/during 'forget_about' could leave a stranded - # file on a dead device and we'd never fix it. - # - # and just for extra safety, in case replication happened - # on another machine after 'enqueue_for_replication' but - # before 'forget_about', and that other machine hadn't yet - # re-read the device table to learn that this device - # was dead, we delay the replication for the amount of time - # that the device summary table is valid for (presumably - # the other trackers are running identical software, or - # at least have the same timeout value) - - $fid->enqueue_for_replication(in => DEVICE_SUMMARY_CACHE_TIMEOUT + 1); - $dev->forget_about($fid); - $fid->update_devcount; - } + # delay the initial device reap in case any replicator cache + # thinks the device is still alive + Danga::Socket->AddTimer(DEVICE_SUMMARY_CACHE_TIMEOUT + 1, sub { + $self->reap_dev($dev->id, REAP_INTERVAL); + }); + + # once we've seen a device, reap_dev will takeover scheduling + # reaping for the given device. + $devid_seen{$dev->id} = 1; } - }); + + Danga::Socket->AddTimer(REAP_INTERVAL, $reap_check); + }; + + # kick off the reaper and loop forever + $reap_check->(); + Danga::Socket->EventLoop; } 1; diff --git a/lib/MogileFS/Worker/Replicate.pm b/lib/MogileFS/Worker/Replicate.pm index f1c686b0..d0a0ffb5 100644 --- a/lib/MogileFS/Worker/Replicate.pm +++ b/lib/MogileFS/Worker/Replicate.pm @@ -8,17 +8,12 @@ use fields ( ); use List::Util (); -use MogileFS::Util qw(error every debug); +use MogileFS::Server; +use MogileFS::Util qw(error every debug wait_for_readability); use MogileFS::Config; -use MogileFS::Class; -use MogileFS::RebalancePolicy::DrainDevices; use MogileFS::ReplicationRequest qw(rr_upgrade); - -# setup the value used in a 'nexttry' field to indicate that this item will never -# actually be tried again and require some sort of manual intervention. -use constant ENDOFTIME => 2147483647; - -sub end_of_time { ENDOFTIME; } +use Digest; +use MIME::Base64 qw(encode_base64); sub new { my ($class, $psock) = @_; @@ -30,41 +25,23 @@ sub new { # replicator wants sub watchdog_timeout { 90; } +use constant SOCK_TIMEOUT => 45; sub work { my $self = shift; - # give the monitor job 15 seconds to give us an update - my $warn_after = time() + 15; - - every(2.0, sub { - # replication doesn't go well if the monitor job hasn't actively started - # marking things as being available - unless ($self->monitor_has_run) { - error("waiting for monitor job to complete a cycle before beginning replication") - if time() > $warn_after; - return; - } - - $self->validate_dbh; - my $dbh = $self->get_dbh or return 0; - my $sto = Mgd::get_store(); - + every(1.0, sub { $self->send_to_parent("worker_bored 100 replicate rebalance"); - # This is here on account of not being able to block on the parent :( - $self->read_from_parent(1); - # TODO: might need to sort types or create priority queues in the - # parent... would want "replicate" work to happen before rebalance. + my $queue_todo = $self->queue_todo('replicate'); my $queue_todo2 = $self->queue_todo('rebalance'); - unless (@$queue_todo || @$queue_todo2) { - $self->parent_ping; - return; - } + return unless (@$queue_todo || @$queue_todo2); + + return unless $self->validate_dbh; + my $sto = Mgd::get_store(); while (my $todo = shift @$queue_todo) { my $fid = $todo->{fid}; - $self->still_alive; $self->replicate_using_torepl_table($todo); } while (my $todo = shift @$queue_todo2) { @@ -81,11 +58,7 @@ sub work { # manually re-run rebalance to retry. $sto->delete_fid_from_file_to_queue($todo->{fid}, REBAL_QUEUE); } - # if replicators are otherwise idle, use them to make the world - # better, rebalancing things (if enabled), and draining devices (if - # any are marked drain) - #$self->rebalance_devices; - #$self->drain_devices; + $_[0]->(0); # don't sleep. }); } @@ -162,7 +135,7 @@ sub replicate_using_torepl_table { # special; update to a time that won't happen again, # as we've encountered a scenario in which case we're # really hosed - $sto->reschedule_file_to_replicate_absolute($fid, ENDOFTIME); + $sto->reschedule_file_to_replicate_absolute($fid, $sto->end_of_time); } elsif ($type eq "offset") { $sto->reschedule_file_to_replicate_relative($fid, $delay+0); } else { @@ -191,16 +164,22 @@ sub replicate_using_torepl_table { my $devfid; # First one we can delete from, we try to rebalance away from. for (@devs) { - my $dev = MogileFS::Device->of_devid($_); - # Not positive 'can_read_from' needs to be here. + my $dev = Mgd::device_factory()->get_by_id($_); + # Not positive 'should_read_from' needs to be here. # We must be able to delete off of this dev so the fid can # move. - if ($dev->can_delete_from && $dev->can_read_from) { + if ($dev->can_delete_from && $dev->should_read_from) { $devfid = MogileFS::DevFID->new($dev, $f); last; } } - $self->rebalance_devfid($devfid) if $devfid; + if ($devfid) { + if ($self->rebalance_devfid($devfid)) { + # disable exponential backoff below if we rebalanced due to + # excessive replication: + $todo->{failcount} = 0; + } + } } # at this point, the rest of the errors require exponential backoff. define what this means @@ -212,9 +191,10 @@ sub replicate_using_torepl_table { return 1; } -# Return 1 on success, 0 on failure. +# Return 1 on success, 0 on failure or no-op. sub rebalance_devfid { my ($self, $devfid, $opts) = @_; + $opts ||= {}; MogileFS::Util::okay_args($opts, qw(avoid_devids target_devids)); my $fid = $devfid->fid; @@ -300,7 +280,7 @@ sub rebalance_devfid { } $unlock->(); - return 1; + return $should_delete; } # replicates $fid to make sure it meets its class' replicate policy. @@ -325,15 +305,14 @@ sub replicate { my $errref = delete $opts{'errref'}; my $no_unlock = delete $opts{'no_unlock'}; - my $sdevid = delete $opts{'source_devid'}; + my $fixed_source = delete $opts{'source_devid'}; my $mask_devids = delete $opts{'mask_devids'} || {}; my $avoid_devids = delete $opts{'avoid_devids'} || {}; - my $target_devids = delete $opts{'target_devids'}; # inverse of avoid_devids. + my $target_devids = delete $opts{'target_devids'} || []; # inverse of avoid_devids. die "unknown_opts" if %opts; die unless ref $mask_devids eq "HASH"; - # bool: if source was explicitly requested by caller - my $fixed_source = $sdevid ? 1 : 0; + my $sdevid; my $sto = Mgd::get_store(); my $unlock = sub { @@ -370,7 +349,7 @@ sub replicate { }; # hashref of devid -> MogileFS::Device - my $devs = MogileFS::Device->map + my $devs = Mgd::device_factory()->map_by_id or die "No device map"; return $retunlock->(0, "failed_getting_lock", "Unable to obtain lock for fid $fidid") @@ -389,13 +368,13 @@ sub replicate { my @on_up_devid; # subset of @on_devs: just devs that are readable foreach my $devid ($fid->devids) { - my $d = MogileFS::Device->of_devid($devid) + my $d = Mgd::device_factory()->get_by_id($devid) or next; push @on_devs, $d; if ($d->dstate->should_have_files && ! $mask_devids->{$devid}) { push @on_devs_tellpol, $d; } - if ($d->dstate->can_read_from) { + if ($d->should_read_from) { push @on_up_devid, $devid; } } @@ -403,9 +382,8 @@ sub replicate { return $retunlock->(0, "no_source", "Source is no longer available replicating $fidid") if @on_devs == 0; return $retunlock->(0, "source_down", "No alive devices available replicating $fidid") if @on_up_devid == 0; - # if they requested a specific source, that source must be up. - if ($sdevid && ! grep { $_ == $sdevid} @on_up_devid) { - return $retunlock->(0, "source_down", "Requested replication source device $sdevid not available"); + if ($fixed_source && ! grep { $_ == $fixed_source } @on_up_devid) { + error("Fixed source dev$fixed_source requested for $fidid but not available. Trying other devices"); } my %dest_failed; # devid -> 1 for each devid we were asked to copy to, but failed. @@ -414,7 +392,7 @@ sub replicate { my $copy_err; my $dest_devs = $devs; - if ($target_devids) { + if (@$target_devids) { $dest_devs = {map { $_ => $devs->{$_} } @$target_devids}; } @@ -484,24 +462,32 @@ sub replicate { } # find where we're replicating from - unless ($fixed_source) { + { # TODO: use an observed good device+host as source to start. my @choices = grep { ! $source_failed{$_} } @on_up_devid; return $retunlock->(0, "source_down", "No devices available replicating $fidid") unless @choices; - @choices = List::Util::shuffle(@choices); - MogileFS::run_global_hook('replicate_order_final_choices', $devs, \@choices); - $sdevid = shift @choices; + if ($fixed_source && grep { $_ == $fixed_source } @choices) { + $sdevid = $fixed_source; + } else { + @choices = List::Util::shuffle(@choices); + MogileFS::run_global_hook('replicate_order_final_choices', $devs, \@choices); + $sdevid = shift @choices; + } } my $worker = MogileFS::ProcManager->is_child or die; + my $digest; + my $fid_checksum = $fid->checksum; + $digest = Digest->new($fid_checksum->hashname) if $fid_checksum; + $digest ||= Digest->new($cls->hashname) if $cls->hashtype; + my $rv = http_copy( sdevid => $sdevid, ddevid => $ddevid, - fid => $fidid, - rfid => $fid, - expected_len => undef, # FIXME: get this info to pass along + fid => $fid, errref => \$copy_err, callback => sub { $worker->still_alive; }, + digest => $digest, ); die "Bogus error code: $copy_err" if !$rv && $copy_err !~ /^(?:src|dest)_error$/; @@ -510,10 +496,8 @@ sub replicate { if ($copy_err eq "src_error") { $source_failed{$sdevid} = 1; - if ($fixed_source) { - # there can't be any more retries, as this source - # is busted and is the only one we wanted. - return $retunlock->(0, "copy_error", "error copying fid $fidid from devid $sdevid during replication"); + if ($fixed_source && $fixed_source == $sdevid) { + error("Fixed source dev$fixed_source was requested for $fidid but failed: will try other sources"); } } else { @@ -524,6 +508,9 @@ sub replicate { my $dfid = MogileFS::DevFID->new($ddevid, $fid); $dfid->add_to_db; + if ($digest && !$fid->checksum) { + $sto->set_checksum($fidid, $cls->hashtype, $digest->digest); + } push @on_devs, $devs->{$ddevid}; push @on_devs_tellpol, $devs->{$ddevid}; @@ -544,47 +531,106 @@ sub replicate { "replication policy ran out of suggestions for us replicating fid $fidid"); } +# Returns a hashref with the following: +# { +# code => HTTP status code integer, +# keep => boolean, whether to keep the connection after reading +# len => value of the Content-Length header (integer) +# } +# Returns undef on timeout +sub read_headers { + my ($sock, $intercopy_cb) = @_; + my $head = ''; + + do { + wait_for_readability(fileno($sock), SOCK_TIMEOUT) or return; + $intercopy_cb->(); + my $r = sysread($sock, $head, 1024, length($head)); + if (defined $r) { + return if $r == 0; # EOF + } elsif ($!{EAGAIN} || $!{EINTR}) { + # loop again + } else { + return; + } + } until ($head =~ /\r?\n\r?\n/); + + my $data; + ($head, $data) = split(/\r?\n\r?\n/, $head, 2); + my @head = split(/\r?\n/, $head); + $head = shift(@head); + $head =~ m!\AHTTP/(\d+\.\d+)\s+(\d+)! or return; + my %rv = ( keep => $1 >= 1.1, code => $2 ); + + foreach my $line (@head) { + if ($line =~ /\AConnection:\s*keep-alive\s*\z/is) { + $rv{keep} = 1; + } elsif ($line =~ /\AConnection:\s*close\s*\z/is) { + $rv{keep} = 0; + } elsif ($line =~ /\AContent-Length:\s*(\d+)\s*\z/is) { + $rv{len} = $1; + } + } + return (\%rv, $data); +} + # copies a file from one Perlbal to another utilizing HTTP sub http_copy { my %opts = @_; - my ($sdevid, $ddevid, $fid, $rfid, $expected_clen, $intercopy_cb, $errref) = + my ($sdevid, $ddevid, $fid, $intercopy_cb, $errref, $digest) = map { delete $opts{$_} } qw(sdevid ddevid fid - rfid - expected_len callback errref + digest ); die if %opts; + $fid = MogileFS::FID->new($fid) unless ref($fid); + my $fidid = $fid->id; + my $expected_clen = $fid->length; + my $clen; + my $content_md5 = ''; + my ($sconn, $dconn); + my $fid_checksum = $fid->checksum; + if ($fid_checksum && $fid_checksum->hashname eq "MD5") { + # some HTTP servers may be able to verify Content-MD5 on PUT + # and reject corrupted requests. no HTTP server should reject + # a request for an unrecognized header + my $b64digest = encode_base64($fid_checksum->{checksum}, ""); + $content_md5 = "\r\nContent-MD5: $b64digest"; + } $intercopy_cb ||= sub {}; + my $err_common = sub { + my ($err, $msg) = @_; + $$errref = $err if $errref; + $sconn->close($err) if $sconn; + $dconn->close($err) if $dconn; + return error($msg); + }; + # handles setting unreachable magic; $error->(reachability, "message") my $error_unreachable = sub { - $$errref = "src_error" if $errref; - return error("Fid $fid unreachable while replicating: $_[0]"); + return $err_common->("src_error", "Fid $fidid unreachable while replicating: $_[0]"); }; my $dest_error = sub { - $$errref = "dest_error" if $errref; - error($_[0]); - return 0; + return $err_common->("dest_error", $_[0]); }; my $src_error = sub { - $$errref = "src_error" if $errref; - error($_[0]); - return 0; + return $err_common->("src_error", $_[0]); }; # get some information we'll need - my $sdev = MogileFS::Device->of_devid($sdevid); - my $ddev = MogileFS::Device->of_devid($ddevid); + my $sdev = Mgd::device_factory()->get_by_id($sdevid); + my $ddev = Mgd::device_factory()->get_by_id($ddevid); - return error("Error: unable to get device information: source=$sdevid, destination=$ddevid, fid=$fid") - unless $sdev && $ddev && $sdev->exists && $ddev->exists; + return error("Error: unable to get device information: source=$sdevid, destination=$ddevid, fid=$fidid") + unless $sdev && $ddev; my $s_dfid = MogileFS::DevFID->new($sdev, $fid); my $d_dfid = MogileFS::DevFID->new($ddev, $fid); @@ -599,99 +645,201 @@ sub http_copy { my ($dhostip, $dport) = ($dhost->ip, $dhost->http_port); unless (defined $spath && defined $dpath && defined $shostip && defined $dhostip && $sport && $dport) { # show detailed information to find out what's not configured right - error("Error: unable to replicate file fid=$fid from device id $sdevid to device id $ddevid"); + error("Error: unable to replicate file fid=$fidid from device id $sdevid to device id $ddevid"); error(" http://$shostip:$sport$spath -> http://$dhostip:$dport$dpath"); return 0; } + my $put = "PUT $dpath HTTP/1.0\r\nConnection: keep-alive\r\n" . + "Content-length: $expected_clen$content_md5\r\n\r\n"; + # need by webdav servers, like lighttpd... $ddev->vivify_directories($d_dfid->url); - # setup our pipe error handler, in case we get closed on - my $pipe_closed = 0; - local $SIG{PIPE} = sub { $pipe_closed = 1; }; - # call a hook for odd casing completely different source data # for specific files. my $shttphost; MogileFS::run_global_hook('replicate_alternate_source', - $rfid, \$shostip, \$sport, \$spath, \$shttphost); + $fid, \$shostip, \$sport, \$spath, \$shttphost); + my $durl = "http://$dhostip:$dport$dpath"; + my $surl = "http://$shostip:$sport$spath"; # okay, now get the file - my $sock = IO::Socket::INET->new(PeerAddr => $shostip, PeerPort => $sport, Timeout => 2) + my %sopts = ( ip => $shostip, port => $sport ); + + my $get = "GET $spath HTTP/1.0\r\nConnection: keep-alive\r\n"; + # plugin set a custom host. + $get .= "Host: $shttphost\r\n" if $shttphost; + + my ($sock, $dsock); + my ($wcount, $bytes_to_read, $written, $remain); + my ($stries, $dtries) = (0, 0); + my ($sres, $data, $bytes); + +retry: + $sconn->close("retrying") if $sconn; + $dconn->close("retrying") if $dconn; + $dconn = undef; + $stries++; + $sconn = $shost->http_conn_get(\%sopts) or return $src_error->("Unable to create source socket to $shostip:$sport for $spath"); - unless ($shttphost) { - $sock->write("GET $spath HTTP/1.0\r\n\r\n"); - } else { - # plugin set a custom host. - $sock->write("GET $spath HTTP/1.0\r\nHost: $shttphost\r\n\r\n"); + $sock = $sconn->sock; + unless ($sock->write("$get\r\n")) { + goto retry if $sconn->retryable && $stries == 1; + return $src_error->("Pipe closed retrieving $spath from $shostip:$sport"); } - return error("Pipe closed retrieving $spath from $shostip:$sport") - if $pipe_closed; # we just want a content length - my $clen; - # FIXME: this can block. needs to timeout. - while (defined (my $line = <$sock>)) { - $line =~ s/[\s\r\n]+$//; - last unless length $line; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - # make sure we get a good response - return $error_unreachable->("Error: Resource http://$shostip:$sport$spath failed: HTTP $1") - unless $1 >= 200 && $1 <= 299; - } - next unless $line =~ /^Content-length:\s*(\d+)\s*$/i; - $clen = $1; + ($sres, $data) = read_headers($sock, $intercopy_cb); + unless ($sres) { + goto retry if $sconn->retryable && $stries == 1; + return $error_unreachable->("Error: Resource $surl failed to return an HTTP response"); + } + unless ($sres->{code} >= 200 && $sres->{code} <= 299) { + return $error_unreachable->("Error: Resource $surl failed: HTTP $sres->{code}"); } + $clen = $sres->{len}; + return $error_unreachable->("File $spath has unexpected content-length of $clen, not $expected_clen") - if defined $expected_clen && $clen != $expected_clen; + if $clen != $expected_clen; # open target for put - my $dsock = IO::Socket::INET->new(PeerAddr => $dhostip, PeerPort => $dport, Timeout => 2) + $dtries++; + $dconn = $dhost->http_conn_get or return $dest_error->("Unable to create dest socket to $dhostip:$dport for $dpath"); - $dsock->write("PUT $dpath HTTP/1.0\r\nContent-length: $clen\r\n\r\n") - or return $dest_error->("Unable to write data to $dpath on $dhostip:$dport"); - return $dest_error->("Pipe closed during write to $dpath on $dhostip:$dport") - if $pipe_closed; + $dsock = $dconn->sock; + + unless ($dsock->write($put)) { + goto retry if $dconn->retryable && $dtries == 1; + return $dest_error->("Pipe closed during write to $dpath on $dhostip:$dport"); + } # now read data and print while we're reading. - my ($data, $written, $remain) = ('', 0, $clen); - my $bytes_to_read = 1024*1024; # read 1MB at a time until there's less than that remaining + $bytes = length($data); + ($written, $remain) = (0, $clen); + $bytes_to_read = 1024*1024; # read 1MB at a time until there's less than that remaining $bytes_to_read = $remain if $remain < $bytes_to_read; - my $finished_read = 0; - - if ($bytes_to_read) { - while (!$pipe_closed && (my $bytes = $sock->read($data, $bytes_to_read))) { - # now we've read in $bytes bytes - $remain -= $bytes; - $bytes_to_read = $remain if $remain < $bytes_to_read; - - my $wbytes = $dsock->send($data); - $written += $wbytes; - return $dest_error->("Error: wrote $wbytes; expected to write $bytes; failed putting to $dpath") - unless $wbytes == $bytes; + $wcount = 0; + + while ($bytes_to_read) { + unless (defined $bytes) { +read_again: + $bytes = sysread($sock, $data, $bytes_to_read); + unless (defined $bytes) { + if ($!{EAGAIN} || $!{EINTR}) { + wait_for_readability(fileno($sock), SOCK_TIMEOUT) and + goto read_again; + } + return $src_error->("error reading midway through source: $!"); + } + if ($bytes == 0) { + return $src_error->("EOF reading midway through source: $!"); + } + } + + # now we've read in $bytes bytes + $remain -= $bytes; + $bytes_to_read = $remain if $remain < $bytes_to_read; + $digest->add($data) if $digest; + + my $data_len = $bytes; + $bytes = undef; + my $data_off = 0; + while (1) { + my $wbytes = syswrite($dsock, $data, $data_len, $data_off); + unless (defined $wbytes) { + # it can take two writes to determine if a socket is dead + # (TCP_NODELAY and TCP_CORK are (and must be) zero here) + goto retry if (!$wcount && $dconn->retryable && $dtries == 1); + return $dest_error->("Error: syswrite failed after $written bytes with: $!; failed putting to $dpath"); + } + $wcount++; + $written += $wbytes; $intercopy_cb->(); + last if ($data_len == $wbytes); - die if $bytes_to_read < 0; - next if $bytes_to_read; - $finished_read = 1; - last; + $data_len -= $wbytes; + $data_off += $wbytes; } + + die if $bytes_to_read < 0; + } + + # source connection drained, return to pool + if ($sres->{keep}) { + $shost->http_conn_put($sconn); + $sconn = undef; } else { - # 0 byte file copy. - $finished_read = 1; + $sconn->close("http_close"); + } + + # callee will want this digest, too, so clone as "digest" is destructive + $digest = $digest->clone->digest if $digest; + + if ($fid_checksum) { + if ($digest ne $fid_checksum->{checksum}) { + my $expect = $fid_checksum->hexdigest; + $digest = unpack("H*", $digest); + return $src_error->("checksum mismatch on GET: expected: $expect actual: $digest"); + } } - return $dest_error->("closed pipe writing to destination") if $pipe_closed; - return $src_error->("error reading midway through source: $!") unless $finished_read; # now read in the response line (should be first line) - my $line = <$dsock>; - if ($line =~ m!^HTTP/\d+\.\d+\s+(\d+)!) { - return 1 if $1 >= 200 && $1 <= 299; - return $dest_error->("Got HTTP status code $1 PUTing to http://$dhostip:$dport$dpath"); + my ($dres, $ddata) = read_headers($dsock, $intercopy_cb); + unless ($dres) { + goto retry if (!$wcount && $dconn->retryable && $dtries == 1); + return $dest_error->("Error: HTTP response line not recognized writing to $durl"); + } + + # drain the response body if there is one + # there may be no dres->{len}/Content-Length if there is no body + my $dlen = ($dres->{len} || 0) - length($ddata); + if ($dlen > 0) { + my $r = $dsock->read($data, $dlen); # dres->{len} should be tiny + if (defined $r) { + if ($r != $dlen) { + Mgd::error("Failed to read $r of Content-Length:$dres->{len} bytes for PUT response on $durl"); + $dres->{keep} = 0; + } + } else { + Mgd::error("Failed to read Content-Length:$dres->{len} bytes for PUT response on $durl ($!)"); + $dres->{keep} = 0; + } + } elsif ($dlen < 0) { + Mgd::error("strange response Content-Length:$dres->{len} with ". + length($ddata) . + " extra bytes for PUT response on $durl ($!)"); + $dres->{keep} = 0; + } + + # return the connection back to the connection pool + if ($dres->{keep}) { + $dhost->http_conn_put($dconn); + $dconn = undef; } else { - return $dest_error->("Error: HTTP response line not recognized writing to http://$dhostip:$dport$dpath: $line"); + $dconn->close("http_close"); + } + + if ($dres->{code} >= 200 && $dres->{code} <= 299) { + if ($digest) { + my $alg = ($fid_checksum && $fid_checksum->hashname) || $fid->class->hashname; + + if ($ddev->{reject_bad_md5} && ($alg eq "MD5")) { + # dest device would've rejected us with a error, + # no need to reread the file + return 1; + } + my $httpfile = MogileFS::HTTPFile->at($durl); + my $actual = $httpfile->digest($alg, $intercopy_cb); + if ($actual ne $digest) { + my $expect = unpack("H*", $digest); + $actual = unpack("H*", $actual); + return $dest_error->("checksum mismatch on PUT, expected: $expect actual: $digest"); + } + } + return 1; } + return $dest_error->("Got HTTP status code $dres->{code} PUTing to $durl"); } 1; diff --git a/lib/Mogstored/ChildProcess/IOStat.pm b/lib/Mogstored/ChildProcess/IOStat.pm index 15b6ec42..4ce5d8c8 100644 --- a/lib/Mogstored/ChildProcess/IOStat.pm +++ b/lib/Mogstored/ChildProcess/IOStat.pm @@ -4,6 +4,9 @@ use base 'Mogstored::ChildProcess'; my $docroot; +my $iostat_cmd = $ENV{MOG_IOSTAT_CMD} || "iostat -dx 1 30"; +if ($^O =~ /darwin/) { $iostat_cmd =~ s/x// } + sub pre_exec_init { my $class = shift; @@ -48,7 +51,7 @@ sub run { my $get_iostat_fh = sub { while (1) { - if ($iostat_pid = open (my $fh, "iostat -dx 1 30|")) { + if ($iostat_pid = open (my $fh, "$iostat_cmd|")) { return $fh; } # TODO: try and find other paths to iostat @@ -63,26 +66,21 @@ sub run { my $mog_sysid = mog_sysid_map(); # 5 (mogdevid) -> 2340 (os devid) my $dev_sysid = {}; # hashref, populated lazily: { /dev/sdg => system dev_t } my %devt_util; # dev_t => 52.55 - my $init = 0; + my $stats = 0; while (<$iofh>) { - if (m/^Device:/) { - %devt_util = (); - $init = 1; - next; - } - next unless $init; - if (m/^ (\S+) .*? ([\d.]+) \n/x) { + if (m/^\s*(\S+)\s.*?([\d.]+)\s*$/) { my ($devnode, $util) = ("/dev/$1", $2); unless (exists $dev_sysid->{$devnode}) { $dev_sysid->{$devnode} = (stat($devnode))[6]; # rdev } my $devt = $dev_sysid->{$devnode}; $devt_util{$devt} = $util; - next; - } - # blank line is the end. - if (m!^\s*\n!) { - $init = 0; + $stats++; + } elsif ($stats) { + # blank line is the end, or any other line we don't understand + # if we have stats, we print them, otherwise do nothing + $stats = 0; + my $ret = ""; foreach my $mogdevid (sort { $a <=> $b } keys %$mog_sysid) { my $devt = $mog_sysid->{$mogdevid}; @@ -93,7 +91,7 @@ sub run { print $ret; $check_for_parent->(); - next; + %devt_util = (); } } } diff --git a/lib/Mogstored/HTTPServer.pm b/lib/Mogstored/HTTPServer.pm index 5a8123a0..96661ff8 100644 --- a/lib/Mogstored/HTTPServer.pm +++ b/lib/Mogstored/HTTPServer.pm @@ -7,6 +7,7 @@ sub new { $self->{listen} = delete $opts{listen}; $self->{maxconns} = delete $opts{maxconns}; $self->{bin} = delete $opts{bin}; + $self->{include} = delete $opts{include}; die "unknown opts" if %opts; return $self; } diff --git a/lib/Mogstored/HTTPServer/Lighttpd.pm b/lib/Mogstored/HTTPServer/Lighttpd.pm index f0fd4270..76c492a3 100644 --- a/lib/Mogstored/HTTPServer/Lighttpd.pm +++ b/lib/Mogstored/HTTPServer/Lighttpd.pm @@ -41,6 +41,9 @@ sub start { my $portnum = $self->listen_port; my $bind_ip = $self->bind_ip; + + my $include_line = sprintf('include "%s"', $self->{include}) + if $self->{include}; print $fh qq{ server.document-root = "$self->{docroot}" @@ -49,6 +52,7 @@ server.bind = "$bind_ip" server.modules = ( "mod_webdav", "mod_status" ) webdav.activate = "enable" status.status-url = "/" +$include_line }; exec $exe, "-D", "-f", $filename; diff --git a/lib/Mogstored/HTTPServer/Nginx.pm b/lib/Mogstored/HTTPServer/Nginx.pm new file mode 100644 index 00000000..c3023867 --- /dev/null +++ b/lib/Mogstored/HTTPServer/Nginx.pm @@ -0,0 +1,202 @@ +package Mogstored::HTTPServer::Nginx; + +use strict; +use base 'Mogstored::HTTPServer'; +use File::Temp (); + +# returns an version number suitable for numeric comparison +sub ngx_version { + my ($major, $minor, $point) = @_; + return ($major << 16) + ($minor << 8) + $point; +} + +sub start { + my $self = shift; + my $exe = $self->{bin}; + + if ($exe && ! -x $exe) { + die "Provided nginx path $exe not valid.\n"; + } + unless ($exe) { + my @loc = qw(/usr/sbin/nginx + /usr/local/bin/nginx + /usr/bin/nginx + ); + foreach my $loc (@loc) { + $exe = $loc; + last if -x $exe; + } + unless (-x $exe) { + die "Can't find nginx in @loc\n"; + } + } + + # get meta-data about nginx binary + my $nginxMeta = `$exe -V 2>&1`; + my $ngxVersion = ngx_version(0,0,0); + if($nginxMeta =~ /nginx\/(\d+)\.(\d+)\.(\d+)/sog) { + $ngxVersion = ngx_version($1,$2,$3); + } + + # determine if nginx can be run in non-daemon mode, supported in $version >= 1.0.9 (non-daemon provides better shutdown/crash support) + # See: http://nginx.org/en/docs/faq/daemon_master_process_off.html + my $nondaemon = $ngxVersion >= ngx_version(1, 0, 9); + + # create tmp directory + my $tmpDir = $self->{docroot} . '/.tmp'; + mkdir $tmpDir; + mkdir $tmpDir.'/logs'; + + my $pidFile = $tmpDir . '/nginx.pid'; + + # fork if nginx supports non-daemon mode + if($nondaemon) { + my $pid = fork(); + die "Can't fork: $!" unless defined $pid; + + if ($pid) { + $self->{pid} = $pid; + Mogstored->on_pid_death($pid => sub { + die "nginx died"; + }); + return; + } + } + # otherwise, try killing previous instance of nginx + else { + my $nginxpid = _getpid($pidFile); + # TODO: Support reloading of nginx instead? + if ($nginxpid) { + my $killed = kill 15,$nginxpid; + if ($killed > 0) { + print "Killed nginx on PID # $nginxpid"; + } + } + } + + my ($fh, $filename) = File::Temp::tempfile(); + $self->{temp_conf_file} = $filename; + + my $portnum = $self->listen_port; + my $bind_ip = $self->bind_ip; + + my $client_max_body_size = "0"; + $client_max_body_size = $self->{client_max_body_size} + if $self->{client_max_body_size}; + + # TODO: Pull from config file? + #print "client_max_body_size = $client_max_body_size\n"; + + my @devdirs = _disks($self->{docroot}); + my $devsection = ''; + + foreach my $devid (@devdirs) { + my $devseg = qq{ + location /$devid { + client_body_temp_path $self->{docroot}/$devid/.tmp; + dav_methods put delete; + dav_access user:rw group:rw all:r; + create_full_put_path on; + } + }; + $devsection .= $devseg; + } + + # determine which temp_path directives are required to isolate this instance of nginx + my $tempPath = "client_body_temp_path $tmpDir/client_body_temp;\n"; + unless($nginxMeta =~ /--without-http_fastcgi_module/sog) { + $tempPath .= "fastcgi_temp_path $tmpDir/fastcgi_temp;\n"; + } + unless($nginxMeta =~ /--without-http_proxy_module/sog) { + $tempPath .= "proxy_temp_path $tmpDir/proxy_temp;\n"; + } + + # Debian squeeze (stable as of 2013/01) is only on nginx 0.7.67 + + # uwsgi support appeared in nginx 0.8.40 + if ($ngxVersion >= ngx_version(0, 8, 40)) { + unless($nginxMeta =~ /--without-http_uwsgi_module/sog) { + $tempPath .= "uwsgi_temp_path $tmpDir/uwsgi_temp;\n"; + } + } + + # scgi support appeared in nginx 0.8.42 + if ($ngxVersion >= ngx_version(0, 8, 42)) { + unless($nginxMeta =~ /--without-http_scgi_module/sog) { + $tempPath .= "scgi_temp_path $tmpDir/scgi_temp;\n"; + } + } + + my $user = $> == 0 ? "user root root;" : ""; + + print $fh qq{ + pid $pidFile; + worker_processes 15; + error_log /dev/null crit; + $user + events { + worker_connections 1024; + } + http { + default_type application/octet-stream; + sendfile on; + keepalive_timeout 0; + client_max_body_size $client_max_body_size; + server_tokens off; + access_log off; + charset utf-8; + server { + listen $bind_ip:$portnum; + root $self->{docroot}; + + $devsection + location /.tmp { + deny all; + } + location / { + autoindex on; + } + } + + $tempPath + } + + lock_file $tmpDir/lock_file; + }; + close $fh; + + # start nginx + if($nondaemon) { + exec $exe, '-p', $tmpDir, '-g', 'daemon off;', '-c', $filename; + exit; + } + else { + my $retval = system $exe, '-p', $tmpDir, '-c', $filename; + die "nginx failed to start\n" if($retval != 0); + } + + return 1; +} + +sub _disks { + my $root = shift; + opendir(my $dh, $root) or die "Failed to open docroot: $root: $!"; + return grep { /^dev\d+$/ } readdir($dh); +} + +sub _getpid { + my ($nginxpidfile) = @_; + local $/ = undef; + open FILE, $nginxpidfile or return; + binmode FILE; + my $string = ; + close FILE; + return $string; +} + +sub DESTROY { + my $self = shift; + unlink $self->{temp_conf_file} if $self->{temp_conf_file}; +} + +1; diff --git a/lib/Mogstored/HTTPServer/Perlbal.pm b/lib/Mogstored/HTTPServer/Perlbal.pm index b9e2d760..17f33ae6 100644 --- a/lib/Mogstored/HTTPServer/Perlbal.pm +++ b/lib/Mogstored/HTTPServer/Perlbal.pm @@ -44,6 +44,7 @@ SERVER max_connections = $self->{maxconns} SET mogstored.enable_delete = 1 SET mogstored.min_put_directory = 1 SET mogstored.persist_client = 1 + SET mogstored.max_chunked_request_size = 0 ENABLE mogstored "; diff --git a/lib/Mogstored/SideChannelClient.pm b/lib/Mogstored/SideChannelClient.pm index 4397a2a9..e68421bf 100644 --- a/lib/Mogstored/SideChannelClient.pm +++ b/lib/Mogstored/SideChannelClient.pm @@ -8,6 +8,16 @@ use fields ( 'read_buf', # unprocessed read buffer 'mogsvc', # the mogstored Perlbal::Service object ); +use Digest; +use POSIX qw(O_RDONLY); +use Mogstored::TaskQueue; + +BEGIN { + eval { require IO::AIO; }; +} + +# TODO: interface to make this tunable +my %digest_queues; # needed since we're pretending to be a Perlbal::Socket... never idle out sub max_idle_time { return 0; } @@ -22,13 +32,26 @@ sub new { return $self; } +sub validate_uri { + my ($self, $uri) = @_; + if ($uri =~ /\.\./) { + $self->write("ERROR: uri invalid (contains ..)\r\n"); + return; + } + $uri; +} + sub event_read { my Mogstored::SideChannelClient $self = shift; my $bref = $self->read(1024); return $self->close unless defined $bref; $self->{read_buf} .= $$bref; + $self->read_buf_consume; +} +sub read_buf_consume { + my $self = shift; my $path = $self->{mogsvc}->{docroot}; while ($self->{read_buf} =~ s/^(.+?)\r?\n//) { @@ -37,12 +60,8 @@ sub event_read { # increase our count $self->{count}++; - # validate uri - my $uri = $1; - if ($uri =~ /\.\./) { - $self->write("ERROR: uri invalid (contains ..)\r\n"); - return; - } + my $uri = $self->validate_uri($1); + return unless defined($uri); # now stat the file to get the size and such Perlbal::AIO::aio_stat("$path$uri", sub { @@ -57,6 +76,15 @@ sub event_read { } $self->watch_read(0); Mogstored->iostat_subscribe($self); + } elsif ($cmd =~ /^(MD5|SHA-1) (\S+)(?: (\w+))?$/) { + # we can easily enable other hash algorithms with the above + # regexp, but we won't for now (see MogileFS::Checksum) + my $alg = $1; + my $uri = $self->validate_uri($2); + my $reason = $3; + return unless defined($uri); + + return $self->digest($alg, $path, $uri, $reason); } else { # we don't understand this so pass it on to manage command interface my @out; @@ -97,4 +125,79 @@ sub die_gracefully { Mogstored->on_sidechannel_die_gracefully; } +sub digest { + my ($self, $alg, $path, $uri, $reason) = @_; + + $self->watch_read(0); + + Perlbal::AIO::aio_open("$path$uri", O_RDONLY, 0, sub { + my $fh = shift; + eval { + IO::AIO::fadvise(fileno($fh), 0, 0, IO::AIO::FADV_SEQUENTIAL()); + }; + + if ($self->{closed}) { + CORE::close($fh) if $fh; + return; + } + if ($fh) { + my $queue; + + if ($reason && $reason eq "fsck") { + # fstat(2) should return immediately, no AIO needed + my $devid = (stat($fh))[0]; + $queue = $digest_queues{$devid} ||= Mogstored::TaskQueue->new; + $queue->run(sub { $self->digest_fh($alg, $fh, $uri, $queue) }); + } else { + $self->digest_fh($alg, $fh, $uri); + } + } else { + $self->write("$uri $alg=-1\r\n"); + $self->after_long_request; + } + }); +} + +sub digest_fh { + my ($self, $alg, $fh, $uri, $queue) = @_; + my $offset = 0; + my $data = ''; + my $digest = Digest->new($alg); + my $cb; + + $cb = sub { + my $retval = shift; + if ($retval > 0) { + my $bytes = length($data); + $offset += $bytes; + $digest->add($data); + Perlbal::AIO::aio_read($fh, $offset, 0x100000, $data, $cb); + } elsif ($retval == 0) { # EOF + $cb = undef; + CORE::close($fh); + $digest = $digest->hexdigest; + $self->write("$uri $alg=$digest\r\n"); + $queue->task_done if $queue; + $self->after_long_request; + } else { + $cb = undef; + CORE::close($fh); + $self->write("ERR read $uri at $offset failed\r\n"); + $queue->task_done if $queue; + $self->after_long_request; # should we try to continue? + } + }; + Perlbal::AIO::aio_read($fh, $offset, 0x100000, $data, $cb); +} + +sub after_long_request { + my $self = shift; + + if ($self->{read_buf} =~ /^(.+?)\r?\n/) { + $self->read_buf_consume; + } else { + $self->watch_read(1); + } +} + 1; diff --git a/lib/Mogstored/SideChannelListener.pm b/lib/Mogstored/SideChannelListener.pm index 0821951b..355662a7 100644 --- a/lib/Mogstored/SideChannelListener.pm +++ b/lib/Mogstored/SideChannelListener.pm @@ -2,6 +2,7 @@ package Mogstored::SideChannelListener; use strict; use base 'Perlbal::TCPListener'; use Mogstored::SideChannelClient; +use Socket qw(SO_KEEPALIVE); sub new { my ($class, $hostport) = @_; @@ -9,7 +10,9 @@ sub new { # exploding/warning. so we created this stub service above in our static # config, just for this. my $svc = Perlbal->service("mgmt") or die "Where is mgmt service?"; - return $class->SUPER::new($hostport, $svc); + my $self = $class->SUPER::new($hostport, $svc); + $self->{sock}->sockopt(SO_KEEPALIVE, 1); + return $self; } sub event_read { diff --git a/lib/Mogstored/TaskQueue.pm b/lib/Mogstored/TaskQueue.pm new file mode 100644 index 00000000..f25a2284 --- /dev/null +++ b/lib/Mogstored/TaskQueue.pm @@ -0,0 +1,42 @@ +# low priority task queue which limits jobs (currently MD5 digest requests) +package Mogstored::TaskQueue; +use fields ( + 'active', # number of active tasks + 'max', # maximum active tasks before deferring to pending + 'pending', # pending code refs for execution + ); + +sub new { + my Mogstored::TaskQueue $self = shift; + $self = fields::new($self) unless ref $self; + $self->{active} = 0; + $self->{max} = 1; + $self->{pending} = []; + $self; +} + +sub run { + my ($self, $task) = @_; + + if ($self->{active} < $self->{max}) { + $self->{active}++; + $task->(); + } else { + push @{$self->{pending}}, $task; + } +} + +sub task_done { + my $self = shift; + + $self->{active}--; + if ($self->{active} < $self->{max}) { + my $task = shift @{$self->{pending}}; + if ($task) { + $self->{active}++; + $task->(); + } + } +} + +1; diff --git a/lib/mogdeps/Danga/Socket.pm b/lib/mogdeps/Danga/Socket.pm deleted file mode 100644 index 9bebf542..00000000 --- a/lib/mogdeps/Danga/Socket.pm +++ /dev/null @@ -1,1437 +0,0 @@ -########################################################################### - -=head1 NAME - -Danga::Socket - Event loop and event-driven async socket base class - -=head1 SYNOPSIS - - package My::Socket - use Danga::Socket; - use base ('Danga::Socket'); - use fields ('my_attribute'); - - sub new { - my My::Socket $self = shift; - $self = fields::new($self) unless ref $self; - $self->SUPER::new( @_ ); - - $self->{my_attribute} = 1234; - return $self; - } - - sub event_err { ... } - sub event_hup { ... } - sub event_write { ... } - sub event_read { ... } - sub close { ... } - - $my_sock->tcp_cork($bool); - - # write returns 1 if all writes have gone through, or 0 if there - # are writes in queue - $my_sock->write($scalar); - $my_sock->write($scalarref); - $my_sock->write(sub { ... }); # run when previous data written - $my_sock->write(undef); # kick-starts - - # read max $bytecount bytes, or undef on connection closed - $scalar_ref = $my_sock->read($bytecount); - - # watch for writability. not needed with ->write(). write() - # will automatically turn on watch_write when you wrote too much - # and turn it off when done - $my_sock->watch_write($bool); - - # watch for readability - $my_sock->watch_read($bool); - - # if you read too much and want to push some back on - # readable queue. (not incredibly well-tested) - $my_sock->push_back_read($buf); # scalar or scalar ref - - Danga::Socket->AddOtherFds(..); - Danga::Socket->SetLoopTimeout($millisecs); - Danga::Socket->DescriptorMap(); - Danga::Socket->WatchedSockets(); # count of DescriptorMap keys - Danga::Socket->SetPostLoopCallback($code); - Danga::Socket->EventLoop(); - -=head1 DESCRIPTION - -This is an abstract base class for objects backed by a socket which -provides the basic framework for event-driven asynchronous IO, -designed to be fast. Danga::Socket is both a base class for objects, -and an event loop. - -Callers subclass Danga::Socket. Danga::Socket's constructor registers -itself with the Danga::Socket event loop, and invokes callbacks on the -object for readability, writability, errors, and other conditions. - -Because Danga::Socket uses the "fields" module, your subclasses must -too. - -=head1 MORE INFO - -For now, see servers using Danga::Socket for guidance. For example: -perlbal, mogilefsd, or ddlockd. - -=head1 API - -Note where "C" is used below, normally you would call these methods as: - - Danga::Socket->method(...); - -However using a subclass works too. - -The CLASS methods are all methods for the event loop part of Danga::Socket, -whereas the object methods are all used on your subclasses. - -=cut - -########################################################################### - -package Danga::Socket; -use strict; -use bytes; -use POSIX (); -use Time::HiRes (); - -my $opt_bsd_resource = eval "use BSD::Resource; 1;"; - -use vars qw{$VERSION}; -$VERSION = "1.61"; - -use warnings; -no warnings qw(deprecated); - -use Sys::Syscall qw(:epoll); - -use fields ('sock', # underlying socket - 'fd', # numeric file descriptor - 'write_buf', # arrayref of scalars, scalarrefs, or coderefs to write - 'write_buf_offset', # offset into first array of write_buf to start writing at - 'write_buf_size', # total length of data in all write_buf items - 'write_set_watch', # bool: true if we internally set watch_write rather than by a subclass - 'read_push_back', # arrayref of "pushed-back" read data the application didn't want - 'closed', # bool: socket is closed - 'corked', # bool: socket is corked - 'event_watch', # bitmask of events the client is interested in (POLLIN,OUT,etc.) - 'peer_v6', # bool: cached; if peer is an IPv6 address - 'peer_ip', # cached stringified IP address of $sock - 'peer_port', # cached port number of $sock - 'local_ip', # cached stringified IP address of local end of $sock - 'local_port', # cached port number of local end of $sock - 'writer_func', # subref which does writing. must return bytes written (or undef) and set $! on errors - ); - -use Errno qw(EINPROGRESS EWOULDBLOCK EISCONN ENOTSOCK - EPIPE EAGAIN EBADF ECONNRESET ENOPROTOOPT); -use Socket qw(IPPROTO_TCP); -use Carp qw(croak confess); - -use constant TCP_CORK => ($^O eq "linux" ? 3 : 0); # FIXME: not hard-coded (Linux-specific too) -use constant DebugLevel => 0; - -use constant POLLIN => 1; -use constant POLLOUT => 4; -use constant POLLERR => 8; -use constant POLLHUP => 16; -use constant POLLNVAL => 32; - -our $HAVE_KQUEUE = eval { require IO::KQueue; 1 }; - -our ( - $HaveEpoll, # Flag -- is epoll available? initially undefined. - $HaveKQueue, - %DescriptorMap, # fd (num) -> Danga::Socket object - %PushBackSet, # fd (num) -> Danga::Socket (fds with pushed back read data) - $Epoll, # Global epoll fd (for epoll mode only) - $KQueue, # Global kqueue fd (for kqueue mode only) - @ToClose, # sockets to close when event loop is done - %OtherFds, # A hash of "other" (non-Danga::Socket) file - # descriptors for the event loop to track. - - $PostLoopCallback, # subref to call at the end of each loop, if defined (global) - %PLCMap, # fd (num) -> PostLoopCallback (per-object) - - $LoopTimeout, # timeout of event loop in milliseconds - $DoProfile, # if on, enable profiling - %Profiling, # what => [ utime, stime, calls ] - $DoneInit, # if we've done the one-time module init yet - @Timers, # timers - ); - -Reset(); - -##################################################################### -### C L A S S M E T H O D S -##################################################################### - -=head2 C<< CLASS->Reset() >> - -Reset all state - -=cut -sub Reset { - %DescriptorMap = (); - %PushBackSet = (); - @ToClose = (); - %OtherFds = (); - $LoopTimeout = -1; # no timeout by default - $DoProfile = 0; - %Profiling = (); - @Timers = (); - - $PostLoopCallback = undef; - %PLCMap = (); - $DoneInit = 0; - - POSIX::close($Epoll) if defined $Epoll && $Epoll >= 0; - POSIX::close($KQueue) if defined $KQueue && $KQueue >= 0; - - *EventLoop = *FirstTimeEventLoop; -} - -=head2 C<< CLASS->HaveEpoll() >> - -Returns a true value if this class will use IO::Epoll for async IO. - -=cut -sub HaveEpoll { - _InitPoller(); - return $HaveEpoll; -} - -=head2 C<< CLASS->WatchedSockets() >> - -Returns the number of file descriptors which are registered with the global -poll object. - -=cut -sub WatchedSockets { - return scalar keys %DescriptorMap; -} -*watched_sockets = *WatchedSockets; - -=head2 C<< CLASS->EnableProfiling() >> - -Turns profiling on, clearing current profiling data. - -=cut -sub EnableProfiling { - if ($opt_bsd_resource) { - %Profiling = (); - $DoProfile = 1; - return 1; - } - return 0; -} - -=head2 C<< CLASS->DisableProfiling() >> - -Turns off profiling, but retains data up to this point - -=cut -sub DisableProfiling { - $DoProfile = 0; -} - -=head2 C<< CLASS->ProfilingData() >> - -Returns reference to a hash of data in format: - - ITEM => [ utime, stime, #calls ] - -=cut -sub ProfilingData { - return \%Profiling; -} - -=head2 C<< CLASS->ToClose() >> - -Return the list of sockets that are awaiting close() at the end of the -current event loop. - -=cut -sub ToClose { return @ToClose; } - -=head2 C<< CLASS->OtherFds( [%fdmap] ) >> - -Get/set the hash of file descriptors that need processing in parallel with -the registered Danga::Socket objects. - -=cut -sub OtherFds { - my $class = shift; - if ( @_ ) { %OtherFds = @_ } - return wantarray ? %OtherFds : \%OtherFds; -} - -=head2 C<< CLASS->AddOtherFds( [%fdmap] ) >> - -Add fds to the OtherFds hash for processing. - -=cut -sub AddOtherFds { - my $class = shift; - %OtherFds = ( %OtherFds, @_ ); # FIXME investigate what happens on dupe fds - return wantarray ? %OtherFds : \%OtherFds; -} - -=head2 C<< CLASS->SetLoopTimeout( $timeout ) >> - -Set the loop timeout for the event loop to some value in milliseconds. - -A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return -immediately. - -=cut -sub SetLoopTimeout { - return $LoopTimeout = $_[1] + 0; -} - -=head2 C<< CLASS->DebugMsg( $format, @args ) >> - -Print the debugging message specified by the C-style I and -I - -=cut -sub DebugMsg { - my ( $class, $fmt, @args ) = @_; - chomp $fmt; - printf STDERR ">>> $fmt\n", @args; -} - -=head2 C<< CLASS->AddTimer( $seconds, $coderef ) >> - -Add a timer to occur $seconds from now. $seconds may be fractional, but timers -are not guaranteed to fire at the exact time you ask for. - -Returns a timer object which you can call C<< $timer->cancel >> on if you need to. - -=cut -sub AddTimer { - my $class = shift; - my ($secs, $coderef) = @_; - - my $fire_time = Time::HiRes::time() + $secs; - - my $timer = bless [$fire_time, $coderef], "Danga::Socket::Timer"; - - if (!@Timers || $fire_time >= $Timers[-1][0]) { - push @Timers, $timer; - return $timer; - } - - # Now, where do we insert? (NOTE: this appears slow, algorithm-wise, - # but it was compared against calendar queues, heaps, naive push/sort, - # and a bunch of other versions, and found to be fastest with a large - # variety of datasets.) - for (my $i = 0; $i < @Timers; $i++) { - if ($Timers[$i][0] > $fire_time) { - splice(@Timers, $i, 0, $timer); - return $timer; - } - } - - die "Shouldn't get here."; -} - -=head2 C<< CLASS->DescriptorMap() >> - -Get the hash of Danga::Socket objects keyed by the file descriptor (fileno) they -are wrapping. - -Returns a hash in list context or a hashref in scalar context. - -=cut -sub DescriptorMap { - return wantarray ? %DescriptorMap : \%DescriptorMap; -} -*descriptor_map = *DescriptorMap; -*get_sock_ref = *DescriptorMap; - -sub _InitPoller -{ - return if $DoneInit; - $DoneInit = 1; - - if ($HAVE_KQUEUE) { - $KQueue = IO::KQueue->new(); - $HaveKQueue = $KQueue >= 0; - if ($HaveKQueue) { - *EventLoop = *KQueueEventLoop; - } - } - elsif (Sys::Syscall::epoll_defined()) { - $Epoll = eval { epoll_create(1024); }; - $HaveEpoll = defined $Epoll && $Epoll >= 0; - if ($HaveEpoll) { - *EventLoop = *EpollEventLoop; - } - } - - if (!$HaveEpoll && !$HaveKQueue) { - require IO::Poll; - *EventLoop = *PollEventLoop; - } -} - -=head2 C<< CLASS->EventLoop() >> - -Start processing IO events. In most daemon programs this never exits. See -C below for how to exit the loop. - -=cut -sub FirstTimeEventLoop { - my $class = shift; - - _InitPoller(); - - if ($HaveEpoll) { - EpollEventLoop($class); - } elsif ($HaveKQueue) { - KQueueEventLoop($class); - } else { - PollEventLoop($class); - } -} - -## profiling-related data/functions -our ($Prof_utime0, $Prof_stime0); -sub _pre_profile { - ($Prof_utime0, $Prof_stime0) = getrusage(); -} - -sub _post_profile { - # get post information - my ($autime, $astime) = getrusage(); - - # calculate differences - my $utime = $autime - $Prof_utime0; - my $stime = $astime - $Prof_stime0; - - foreach my $k (@_) { - $Profiling{$k} ||= [ 0.0, 0.0, 0 ]; - $Profiling{$k}->[0] += $utime; - $Profiling{$k}->[1] += $stime; - $Profiling{$k}->[2]++; - } -} - -# runs timers and returns milliseconds for next one, or next event loop -sub RunTimers { - return $LoopTimeout unless @Timers; - - my $now = Time::HiRes::time(); - - # Run expired timers - while (@Timers && $Timers[0][0] <= $now) { - my $to_run = shift(@Timers); - $to_run->[1]->($now) if $to_run->[1]; - } - - return $LoopTimeout unless @Timers; - - # convert time to an even number of milliseconds, adding 1 - # extra, otherwise floating point fun can occur and we'll - # call RunTimers like 20-30 times, each returning a timeout - # of 0.0000212 seconds - my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; - - # -1 is an infinite timeout, so prefer a real timeout - return $timeout if $LoopTimeout == -1; - - # otherwise pick the lower of our regular timeout and time until - # the next timer - return $LoopTimeout if $LoopTimeout < $timeout; - return $timeout; -} - -### The epoll-based event loop. Gets installed as EventLoop if IO::Epoll loads -### okay. -sub EpollEventLoop { - my $class = shift; - - foreach my $fd ( keys %OtherFds ) { - if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, EPOLLIN) == -1) { - warn "epoll_ctl(): failure adding fd=$fd; $! (", $!+0, ")\n"; - } - } - - while (1) { - my @events; - my $i; - my $timeout = RunTimers(); - - # get up to 1000 events - my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events); - EVENT: - for ($i=0; $i<$evcount; $i++) { - my $ev = $events[$i]; - - # it's possible epoll_wait returned many events, including some at the end - # that ones in the front triggered unregister-interest actions. if we - # can't find the %sock entry, it's because we're no longer interested - # in that event. - my Danga::Socket $pob = $DescriptorMap{$ev->[0]}; - my $code; - my $state = $ev->[1]; - - # if we didn't find a Perlbal::Socket subclass for that fd, try other - # pseudo-registered (above) fds. - if (! $pob) { - if (my $code = $OtherFds{$ev->[0]}) { - $code->($state); - } else { - my $fd = $ev->[0]; - warn "epoll() returned fd $fd w/ state $state for which we have no mapping. removing.\n"; - POSIX::close($fd); - epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0); - } - next; - } - - DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), state=%d \@ %s\n", - $ev->[0], ref($pob), $ev->[1], time); - - if ($DoProfile) { - my $class = ref $pob; - - # call profiling action on things that need to be done - if ($state & EPOLLIN && ! $pob->{closed}) { - _pre_profile(); - $pob->event_read; - _post_profile("$class-read"); - } - - if ($state & EPOLLOUT && ! $pob->{closed}) { - _pre_profile(); - $pob->event_write; - _post_profile("$class-write"); - } - - if ($state & (EPOLLERR|EPOLLHUP)) { - if ($state & EPOLLERR && ! $pob->{closed}) { - _pre_profile(); - $pob->event_err; - _post_profile("$class-err"); - } - if ($state & EPOLLHUP && ! $pob->{closed}) { - _pre_profile(); - $pob->event_hup; - _post_profile("$class-hup"); - } - } - - next; - } - - # standard non-profiling codepat - $pob->event_read if $state & EPOLLIN && ! $pob->{closed}; - $pob->event_write if $state & EPOLLOUT && ! $pob->{closed}; - if ($state & (EPOLLERR|EPOLLHUP)) { - $pob->event_err if $state & EPOLLERR && ! $pob->{closed}; - $pob->event_hup if $state & EPOLLHUP && ! $pob->{closed}; - } - } - return unless PostEventLoop(); - } - exit 0; -} - -### The fallback IO::Poll-based event loop. Gets installed as EventLoop if -### IO::Epoll fails to load. -sub PollEventLoop { - my $class = shift; - - my Danga::Socket $pob; - - while (1) { - my $timeout = RunTimers(); - - # the following sets up @poll as a series of ($poll,$event_mask) - # items, then uses IO::Poll::_poll, implemented in XS, which - # modifies the array in place with the even elements being - # replaced with the event masks that occured. - my @poll; - foreach my $fd ( keys %OtherFds ) { - push @poll, $fd, POLLIN; - } - while ( my ($fd, $sock) = each %DescriptorMap ) { - push @poll, $fd, $sock->{event_watch}; - } - - # if nothing to poll, either end immediately (if no timeout) - # or just keep calling the callback - unless (@poll) { - select undef, undef, undef, ($timeout / 1000); - return unless PostEventLoop(); - next; - } - - my $count = IO::Poll::_poll($timeout, @poll); - unless ($count) { - return unless PostEventLoop(); - next; - } - - # Fetch handles with read events - while (@poll) { - my ($fd, $state) = splice(@poll, 0, 2); - next unless $state; - - $pob = $DescriptorMap{$fd}; - - if (!$pob) { - if (my $code = $OtherFds{$fd}) { - $code->($state); - } - next; - } - - $pob->event_read if $state & POLLIN && ! $pob->{closed}; - $pob->event_write if $state & POLLOUT && ! $pob->{closed}; - $pob->event_err if $state & POLLERR && ! $pob->{closed}; - $pob->event_hup if $state & POLLHUP && ! $pob->{closed}; - } - - return unless PostEventLoop(); - } - - exit 0; -} - -### The kqueue-based event loop. Gets installed as EventLoop if IO::KQueue works -### okay. -sub KQueueEventLoop { - my $class = shift; - - foreach my $fd (keys %OtherFds) { - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_READ(), IO::KQueue::EV_ADD()); - } - - while (1) { - my $timeout = RunTimers(); - my @ret = $KQueue->kevent($timeout); - - foreach my $kev (@ret) { - my ($fd, $filter, $flags, $fflags) = @$kev; - my Danga::Socket $pob = $DescriptorMap{$fd}; - if (!$pob) { - if (my $code = $OtherFds{$fd}) { - $code->($filter); - } else { - warn "kevent() returned fd $fd for which we have no mapping. removing.\n"; - POSIX::close($fd); # close deletes the kevent entry - } - next; - } - - DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), flags=%d \@ %s\n", - $fd, ref($pob), $flags, time); - - $pob->event_read if $filter == IO::KQueue::EVFILT_READ() && !$pob->{closed}; - $pob->event_write if $filter == IO::KQueue::EVFILT_WRITE() && !$pob->{closed}; - if ($flags == IO::KQueue::EV_EOF() && !$pob->{closed}) { - if ($fflags) { - $pob->event_err; - } else { - $pob->event_hup; - } - } - } - return unless PostEventLoop(); - } - - exit(0); -} - -=head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >> - -Sets post loop callback function. Pass a subref and it will be -called every time the event loop finishes. - -Return 1 (or any true value) from the sub to make the loop continue, 0 or false -and it will exit. - -The callback function will be passed two parameters: \%DescriptorMap, \%OtherFds. - -=cut -sub SetPostLoopCallback { - my ($class, $ref) = @_; - - if (ref $class) { - # per-object callback - my Danga::Socket $self = $class; - if (defined $ref && ref $ref eq 'CODE') { - $PLCMap{$self->{fd}} = $ref; - } else { - delete $PLCMap{$self->{fd}}; - } - } else { - # global callback - $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef; - } -} - -# Internal function: run the post-event callback, send read events -# for pushed-back data, and close pending connections. returns 1 -# if event loop should continue, or 0 to shut it all down. -sub PostEventLoop { - # fire read events for objects with pushed-back read data - my $loop = 1; - while ($loop) { - $loop = 0; - foreach my $fd (keys %PushBackSet) { - my Danga::Socket $pob = $PushBackSet{$fd}; - - # a previous event_read invocation could've closed a - # connection that we already evaluated in "keys - # %PushBackSet", so skip ones that seem to have - # disappeared. this is expected. - next unless $pob; - - die "ASSERT: the $pob socket has no read_push_back" unless @{$pob->{read_push_back}}; - next unless (! $pob->{closed} && - $pob->{event_watch} & POLLIN); - $loop = 1; - $pob->event_read; - } - } - - # now we can close sockets that wanted to close during our event processing. - # (we didn't want to close them during the loop, as we didn't want fd numbers - # being reused and confused during the event loop) - while (my $sock = shift @ToClose) { - my $fd = fileno($sock); - - # close the socket. (not a Danga::Socket close) - $sock->close; - - # and now we can finally remove the fd from the map. see - # comment above in _cleanup. - delete $DescriptorMap{$fd}; - } - - - # by default we keep running, unless a postloop callback (either per-object - # or global) cancels it - my $keep_running = 1; - - # per-object post-loop-callbacks - for my $plc (values %PLCMap) { - $keep_running &&= $plc->(\%DescriptorMap, \%OtherFds); - } - - # now we're at the very end, call callback if defined - if (defined $PostLoopCallback) { - $keep_running &&= $PostLoopCallback->(\%DescriptorMap, \%OtherFds); - } - - return $keep_running; -} - -##################################################################### -### Danga::Socket-the-object code -##################################################################### - -=head2 OBJECT METHODS - -=head2 C<< CLASS->new( $socket ) >> - -Create a new Danga::Socket subclass object for the given I which will -react to events on it during the C. - -This is normally (always?) called from your subclass via: - - $class->SUPER::new($socket); - -=cut -sub new { - my Danga::Socket $self = shift; - $self = fields::new($self) unless ref $self; - - my $sock = shift; - - $self->{sock} = $sock; - my $fd = fileno($sock); - - Carp::cluck("undef sock and/or fd in Danga::Socket->new. sock=" . ($sock || "") . ", fd=" . ($fd || "")) - unless $sock && $fd; - - $self->{fd} = $fd; - $self->{write_buf} = []; - $self->{write_buf_offset} = 0; - $self->{write_buf_size} = 0; - $self->{closed} = 0; - $self->{corked} = 0; - $self->{read_push_back} = []; - - $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL; - - _InitPoller(); - - if ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $self->{event_watch}) - and die "couldn't add epoll watch for $fd\n"; - } - elsif ($HaveKQueue) { - # Add them to the queue but disabled for now - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_READ(), - IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE()); - $KQueue->EV_SET($fd, IO::KQueue::EVFILT_WRITE(), - IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE()); - } - - Carp::cluck("Danga::Socket::new blowing away existing descriptor map for fd=$fd ($DescriptorMap{$fd})") - if $DescriptorMap{$fd}; - - $DescriptorMap{$fd} = $self; - return $self; -} - - -##################################################################### -### I N S T A N C E M E T H O D S -##################################################################### - -=head2 C<< $obj->tcp_cork( $boolean ) >> - -Turn TCP_CORK on or off depending on the value of I. - -=cut -sub tcp_cork { - my Danga::Socket $self = $_[0]; - my $val = $_[1]; - - # make sure we have a socket - return unless $self->{sock}; - return if $val == $self->{corked}; - - my $rv; - if (TCP_CORK) { - $rv = setsockopt($self->{sock}, IPPROTO_TCP, TCP_CORK, - pack("l", $val ? 1 : 0)); - } else { - # FIXME: implement freebsd *PUSH sockopts - $rv = 1; - } - - # if we failed, close (if we're not already) and warn about the error - if ($rv) { - $self->{corked} = $val; - } else { - if ($! == EBADF || $! == ENOTSOCK) { - # internal state is probably corrupted; warn and then close if - # we're not closed already - warn "setsockopt: $!"; - $self->close('tcp_cork_failed'); - } elsif ($! == ENOPROTOOPT || $!{ENOTSOCK} || $!{EOPNOTSUPP}) { - # TCP implementation doesn't support corking, so just ignore it - # or we're trying to tcp-cork a non-socket (like a socketpair pipe - # which is acting like a socket, which Perlbal does for child - # processes acting like inetd-like web servers) - } else { - # some other error; we should never hit here, but if we do, die - die "setsockopt: $!"; - } - } -} - -=head2 C<< $obj->steal_socket() >> - -Basically returns our socket and makes it so that we don't try to close it, -but we do remove it from epoll handlers. THIS CLOSES $self. It is the same -thing as calling close, except it gives you the socket to use. - -=cut -sub steal_socket { - my Danga::Socket $self = $_[0]; - return if $self->{closed}; - - # cleanup does most of the work of closing this socket - $self->_cleanup(); - - # now undef our internal sock and fd structures so we don't use them - my $sock = $self->{sock}; - $self->{sock} = undef; - return $sock; -} - -=head2 C<< $obj->close( [$reason] ) >> - -Close the socket. The I argument will be used in debugging messages. - -=cut -sub close { - my Danga::Socket $self = $_[0]; - return if $self->{closed}; - - # print out debugging info for this close - if (DebugLevel) { - my ($pkg, $filename, $line) = caller; - my $reason = $_[1] || ""; - warn "Closing \#$self->{fd} due to $pkg/$filename/$line ($reason)\n"; - } - - # this does most of the work of closing us - $self->_cleanup(); - - # defer closing the actual socket until the event loop is done - # processing this round of events. (otherwise we might reuse fds) - if ($self->{sock}) { - push @ToClose, $self->{sock}; - $self->{sock} = undef; - } - - return 0; -} - -### METHOD: _cleanup() -### Called by our closers so we can clean internal data structures. -sub _cleanup { - my Danga::Socket $self = $_[0]; - - # we're effectively closed; we have no fd and sock when we leave here - $self->{closed} = 1; - - # we need to flush our write buffer, as there may - # be self-referential closures (sub { $client->close }) - # preventing the object from being destroyed - $self->{write_buf} = []; - - # uncork so any final data gets sent. only matters if the person closing - # us forgot to do it, but we do it to be safe. - $self->tcp_cork(0); - - # if we're using epoll, we have to remove this from our epoll fd so we stop getting - # notifications about it - if ($HaveEpoll && $self->{fd}) { - if (epoll_ctl($Epoll, EPOLL_CTL_DEL, $self->{fd}, $self->{event_watch}) != 0) { - # dump_error prints a backtrace so we can try to figure out why this happened - $self->dump_error("epoll_ctl(): failure deleting fd=$self->{fd} during _cleanup(); $! (" . ($!+0) . ")"); - } - } - - # now delete from mappings. this fd no longer belongs to us, so we don't want - # to get alerts for it if it becomes writable/readable/etc. - delete $PushBackSet{$self->{fd}}; - delete $PLCMap{$self->{fd}}; - - # we explicitly don't delete from DescriptorMap here until we - # actually close the socket, as we might be in the middle of - # processing an epoll_wait/etc that returned hundreds of fds, one - # of which is not yet processed and is what we're closing. if we - # keep it in DescriptorMap, then the event harnesses can just - # looked at $pob->{closed} and ignore it. but if it's an - # un-accounted for fd, then it (understandably) freak out a bit - # and emit warnings, thinking their state got off. - - # and finally get rid of our fd so we can't use it anywhere else - $self->{fd} = undef; -} - -=head2 C<< $obj->sock() >> - -Returns the underlying IO::Handle for the object. - -=cut -sub sock { - my Danga::Socket $self = shift; - return $self->{sock}; -} - -=head2 C<< $obj->set_writer_func( CODEREF ) >> - -Sets a function to use instead of C when writing data to the socket. - -=cut -sub set_writer_func { - my Danga::Socket $self = shift; - my $wtr = shift; - Carp::croak("Not a subref") unless !defined $wtr || UNIVERSAL::isa($wtr, "CODE"); - $self->{writer_func} = $wtr; -} - -=head2 C<< $obj->write( $data ) >> - -Write the specified data to the underlying handle. I may be scalar, -scalar ref, code ref (to run when there), or undef just to kick-start. -Returns 1 if writes all went through, or 0 if there are writes in queue. If -it returns 1, caller should stop waiting for 'writable' events) - -=cut -sub write { - my Danga::Socket $self; - my $data; - ($self, $data) = @_; - - # nobody should be writing to closed sockets, but caller code can - # do two writes within an event, have the first fail and - # disconnect the other side (whose destructor then closes the - # calling object, but it's still in a method), and then the - # now-dead object does its second write. that is this case. we - # just lie and say it worked. it'll be dead soon and won't be - # hurt by this lie. - return 1 if $self->{closed}; - - my $bref; - - # just queue data if there's already a wait - my $need_queue; - - if (defined $data) { - $bref = ref $data ? $data : \$data; - if ($self->{write_buf_size}) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += ref $bref eq "SCALAR" ? length($$bref) : 1; - return 0; - } - - # this flag says we're bypassing the queue system, knowing we're the - # only outstanding write, and hoping we don't ever need to use it. - # if so later, though, we'll need to queue - $need_queue = 1; - } - - WRITE: - while (1) { - return 1 unless $bref ||= $self->{write_buf}[0]; - - my $len; - eval { - $len = length($$bref); # this will die if $bref is a code ref, caught below - }; - if ($@) { - if (UNIVERSAL::isa($bref, "CODE")) { - unless ($need_queue) { - $self->{write_buf_size}--; # code refs are worth 1 - shift @{$self->{write_buf}}; - } - $bref->(); - - # code refs are just run and never get reenqueued - # (they're one-shot), so turn off the flag indicating the - # outstanding data needs queueing. - $need_queue = 0; - - undef $bref; - next WRITE; - } - die "Write error: $@ <$bref>"; - } - - my $to_write = $len - $self->{write_buf_offset}; - my $written; - if (my $wtr = $self->{writer_func}) { - $written = $wtr->($bref, $to_write, $self->{write_buf_offset}); - } else { - $written = syswrite($self->{sock}, $$bref, $to_write, $self->{write_buf_offset}); - } - - if (! defined $written) { - if ($! == EPIPE) { - return $self->close("EPIPE"); - } elsif ($! == EAGAIN) { - # since connection has stuff to write, it should now be - # interested in pending writes: - if ($need_queue) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += $len; - } - $self->{write_set_watch} = 1 unless $self->{event_watch} & POLLOUT; - $self->watch_write(1); - return 0; - } elsif ($! == ECONNRESET) { - return $self->close("ECONNRESET"); - } - - DebugLevel >= 1 && $self->debugmsg("Closing connection ($self) due to write error: $!\n"); - - return $self->close("write_error"); - } elsif ($written != $to_write) { - DebugLevel >= 2 && $self->debugmsg("Wrote PARTIAL %d bytes to %d", - $written, $self->{fd}); - if ($need_queue) { - push @{$self->{write_buf}}, $bref; - $self->{write_buf_size} += $len; - } - # since connection has stuff to write, it should now be - # interested in pending writes: - $self->{write_buf_offset} += $written; - $self->{write_buf_size} -= $written; - $self->on_incomplete_write; - return 0; - } elsif ($written == $to_write) { - DebugLevel >= 2 && $self->debugmsg("Wrote ALL %d bytes to %d (nq=%d)", - $written, $self->{fd}, $need_queue); - $self->{write_buf_offset} = 0; - - if ($self->{write_set_watch}) { - $self->watch_write(0); - $self->{write_set_watch} = 0; - } - - # this was our only write, so we can return immediately - # since we avoided incrementing the buffer size or - # putting it in the buffer. we also know there - # can't be anything else to write. - return 1 if $need_queue; - - $self->{write_buf_size} -= $written; - shift @{$self->{write_buf}}; - undef $bref; - next WRITE; - } - } -} - -sub on_incomplete_write { - my Danga::Socket $self = shift; - $self->{write_set_watch} = 1 unless $self->{event_watch} & POLLOUT; - $self->watch_write(1); -} - -=head2 C<< $obj->push_back_read( $buf ) >> - -Push back I (a scalar or scalarref) into the read stream. Useful if you read -more than you need to and want to return this data on the next "read". - -=cut -sub push_back_read { - my Danga::Socket $self = shift; - my $buf = shift; - push @{$self->{read_push_back}}, ref $buf ? $buf : \$buf; - $PushBackSet{$self->{fd}} = $self; -} - -=head2 C<< $obj->read( $bytecount ) >> - -Read at most I bytes from the underlying handle; returns scalar -ref on read, or undef on connection closed. - -=cut -sub read { - my Danga::Socket $self = shift; - return if $self->{closed}; - my $bytes = shift; - my $buf; - my $sock = $self->{sock}; - - if (@{$self->{read_push_back}}) { - $buf = shift @{$self->{read_push_back}}; - my $len = length($$buf); - - if ($len <= $bytes) { - delete $PushBackSet{$self->{fd}} unless @{$self->{read_push_back}}; - return $buf; - } else { - # if the pushed back read is too big, we have to split it - my $overflow = substr($$buf, $bytes); - $buf = substr($$buf, 0, $bytes); - unshift @{$self->{read_push_back}}, \$overflow; - return \$buf; - } - } - - # if this is too high, perl quits(!!). reports on mailing lists - # don't seem to point to a universal answer. 5MB worked for some, - # crashed for others. 1MB works for more people. let's go with 1MB - # for now. :/ - my $req_bytes = $bytes > 1048576 ? 1048576 : $bytes; - - my $res = sysread($sock, $buf, $req_bytes, 0); - DebugLevel >= 2 && $self->debugmsg("sysread = %d; \$! = %d", $res, $!); - - if (! $res && $! != EWOULDBLOCK) { - # catches 0=conn closed or undef=error - DebugLevel >= 2 && $self->debugmsg("Fd \#%d read hit the end of the road.", $self->{fd}); - return undef; - } - - return \$buf; -} - -=head2 (VIRTUAL) C<< $obj->event_read() >> - -Readable event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_read { die "Base class event_read called for $_[0]\n"; } - -=head2 (VIRTUAL) C<< $obj->event_err() >> - -Error event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_err { die "Base class event_err called for $_[0]\n"; } - -=head2 (VIRTUAL) C<< $obj->event_hup() >> - -'Hangup' event handler. Concrete deriviatives of Danga::Socket should -provide an implementation of this. The default implementation will die if -called. - -=cut -sub event_hup { die "Base class event_hup called for $_[0]\n"; } - -=head2 C<< $obj->event_write() >> - -Writable event handler. Concrete deriviatives of Danga::Socket may wish to -provide an implementation of this. The default implementation calls -C with an C. - -=cut -sub event_write { - my $self = shift; - $self->write(undef); -} - -=head2 C<< $obj->watch_read( $boolean ) >> - -Turn 'readable' event notification on or off. - -=cut -sub watch_read { - my Danga::Socket $self = shift; - return if $self->{closed} || !$self->{sock}; - - my $val = shift; - my $event = $self->{event_watch}; - - $event &= ~POLLIN if ! $val; - $event |= POLLIN if $val; - - # If it changed, set it - if ($event != $self->{event_watch}) { - if ($HaveKQueue) { - $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_READ(), - $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE()); - } - elsif ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event) - and $self->dump_error("couldn't modify epoll settings for $self->{fd} " . - "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")"); - } - $self->{event_watch} = $event; - } -} - -=head2 C<< $obj->watch_write( $boolean ) >> - -Turn 'writable' event notification on or off. - -=cut -sub watch_write { - my Danga::Socket $self = shift; - return if $self->{closed} || !$self->{sock}; - - my $val = shift; - my $event = $self->{event_watch}; - - $event &= ~POLLOUT if ! $val; - $event |= POLLOUT if $val; - - if ($val && caller ne __PACKAGE__) { - # A subclass registered interest, it's now responsible for this. - $self->{write_set_watch} = 0; - } - - # If it changed, set it - if ($event != $self->{event_watch}) { - if ($HaveKQueue) { - $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_WRITE(), - $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE()); - } - elsif ($HaveEpoll) { - epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event) - and $self->dump_error("couldn't modify epoll settings for $self->{fd} " . - "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")"); - } - $self->{event_watch} = $event; - } -} - -=head2 C<< $obj->dump_error( $message ) >> - -Prints to STDERR a backtrace with information about this socket and what lead -up to the dump_error call. - -=cut -sub dump_error { - my $i = 0; - my @list; - while (my ($file, $line, $sub) = (caller($i++))[1..3]) { - push @list, "\t$file:$line called $sub\n"; - } - - warn "ERROR: $_[1]\n" . - "\t$_[0] = " . $_[0]->as_string . "\n" . - join('', @list); -} - -=head2 C<< $obj->debugmsg( $format, @args ) >> - -Print the debugging message specified by the C-style I and -I. - -=cut -sub debugmsg { - my ( $self, $fmt, @args ) = @_; - confess "Not an object" unless ref $self; - - chomp $fmt; - printf STDERR ">>> $fmt\n", @args; -} - - -=head2 C<< $obj->peer_ip_string() >> - -Returns the string describing the peer's IP - -=cut -sub peer_ip_string { - my Danga::Socket $self = shift; - return _undef("peer_ip_string undef: no sock") unless $self->{sock}; - return $self->{peer_ip} if defined $self->{peer_ip}; - - my $pn = getpeername($self->{sock}); - return _undef("peer_ip_string undef: getpeername") unless $pn; - - my ($port, $iaddr) = eval { - if (length($pn) >= 28) { - return Socket6::unpack_sockaddr_in6($pn); - } else { - return Socket::sockaddr_in($pn); - } - }; - - if ($@) { - $self->{peer_port} = "[Unknown peerport '$@']"; - return "[Unknown peername '$@']"; - } - - $self->{peer_port} = $port; - - if (length($iaddr) == 4) { - return $self->{peer_ip} = Socket::inet_ntoa($iaddr); - } else { - $self->{peer_v6} = 1; - return $self->{peer_ip} = Socket6::inet_ntop(Socket6::AF_INET6(), - $iaddr); - } -} - -=head2 C<< $obj->peer_addr_string() >> - -Returns the string describing the peer for the socket which underlies this -object in form "ip:port" - -=cut -sub peer_addr_string { - my Danga::Socket $self = shift; - my $ip = $self->peer_ip_string - or return undef; - return $self->{peer_v6} ? - "[$ip]:$self->{peer_port}" : - "$ip:$self->{peer_port}"; -} - -=head2 C<< $obj->local_ip_string() >> - -Returns the string describing the local IP - -=cut -sub local_ip_string { - my Danga::Socket $self = shift; - return _undef("local_ip_string undef: no sock") unless $self->{sock}; - return $self->{local_ip} if defined $self->{local_ip}; - - my $pn = getsockname($self->{sock}); - return _undef("local_ip_string undef: getsockname") unless $pn; - - my ($port, $iaddr) = Socket::sockaddr_in($pn); - $self->{local_port} = $port; - - return $self->{local_ip} = Socket::inet_ntoa($iaddr); -} - -=head2 C<< $obj->local_addr_string() >> - -Returns the string describing the local end of the socket which underlies this -object in form "ip:port" - -=cut -sub local_addr_string { - my Danga::Socket $self = shift; - my $ip = $self->local_ip_string; - return $ip ? "$ip:$self->{local_port}" : undef; -} - - -=head2 C<< $obj->as_string() >> - -Returns a string describing this socket. - -=cut -sub as_string { - my Danga::Socket $self = shift; - my $rw = "(" . ($self->{event_watch} & POLLIN ? 'R' : '') . - ($self->{event_watch} & POLLOUT ? 'W' : '') . ")"; - my $ret = ref($self) . "$rw: " . ($self->{closed} ? "closed" : "open"); - my $peer = $self->peer_addr_string; - if ($peer) { - $ret .= " to " . $self->peer_addr_string; - } - return $ret; -} - -sub _undef { - return undef unless $ENV{DS_DEBUG}; - my $msg = shift || ""; - warn "Danga::Socket: $msg\n"; - return undef; -} - -package Danga::Socket::Timer; -# [$abs_float_firetime, $coderef]; -sub cancel { - $_[0][1] = undef; -} - -=head1 AUTHORS - -Brad Fitzpatrick - author - -Michael Granger - docs, testing - -Mark Smith - contributor, heavy user, testing - -Matt Sergeant - kqueue support, docs, timers, other bits - -=head1 BUGS - -Not documented enough (but isn't that true of every project?). - -tcp_cork only works on Linux for now. No BSD push/nopush support. - -=head1 LICENSE - -License is granted to use and distribute this module under the same -terms as Perl itself. - -=cut - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Net/Netmask.pm b/lib/mogdeps/Net/Netmask.pm deleted file mode 100644 index 89908e20..00000000 --- a/lib/mogdeps/Net/Netmask.pm +++ /dev/null @@ -1,583 +0,0 @@ - -package Net::Netmask; - -use vars qw($VERSION); -$VERSION = 1.9011; - -require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(findNetblock findOuterNetblock findAllNetblock - cidrs2contiglists range2cidrlist sort_by_ip_address - dumpNetworkTable sort_network_blocks cidrs2cidrs - cidrs2inverse); -@EXPORT_OK = (@EXPORT, qw(int2quad quad2int %quadmask2bits - %quadhostmask2bits imask sameblock cmpblocks contains)); - -my $remembered = {}; -my %imask2bits; -my %size2bits; -my @imask; - -# our %quadmask2bits; -# our %quadhostmask2bits; - -use vars qw($error $debug %quadmask2bits %quadhostmask2bits); -$debug = 1; - -use strict; -use warnings; -use Carp; -use overload - '""' => \&desc, - '<=>' => \&cmp_net_netmask_block, - 'cmp' => \&cmp_net_netmask_block, - 'fallback' => 1; - -sub new -{ - my ($package, $net, $mask) = @_; - - $mask = '' unless defined $mask; - - my $base; - my $bits; - my $ibase; - undef $error; - - if ($net =~ m,^(\d+\.\d+\.\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ($1, $2); - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)[:/](\d+\.\d+\.\d+\.\d+)$,) { - $base = $1; - my $quadmask = $2; - if (exists $quadmask2bits{$quadmask}) { - $bits = $quadmask2bits{$quadmask}; - } else { - $error = "illegal netmask: $quadmask"; - } - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)[#](\d+\.\d+\.\d+\.\d+)$,) { - $base = $1; - my $hostmask = $2; - if (exists $quadhostmask2bits{$hostmask}) { - $bits = $quadhostmask2bits{$hostmask}; - } else { - $error = "illegal hostmask: $hostmask"; - } - } elsif (($net =~ m,^\d+\.\d+\.\d+\.\d+$,) - && ($mask =~ m,\d+\.\d+\.\d+\.\d+$,)) - { - $base = $net; - if (exists $quadmask2bits{$mask}) { - $bits = $quadmask2bits{$mask}; - } else { - $error = "illegal netmask: $mask"; - } - } elsif (($net =~ m,^\d+\.\d+\.\d+\.\d+$,) && - ($mask =~ m,0x[a-z0-9]+,i)) - { - $base = $net; - my $imask = hex($mask); - if (exists $imask2bits{$imask}) { - $bits = $imask2bits{$imask}; - } else { - $error = "illegal netmask: $mask ($imask)"; - } - } elsif ($net =~ /^\d+\.\d+\.\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ($net, 32); - } elsif ($net =~ /^\d+\.\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0", 24); - } elsif ($net =~ /^\d+\.\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0.0", 16); - } elsif ($net =~ /^\d+$/ && ! $mask) { - ($base, $bits) = ("$net.0.0.0", 8); - } elsif ($net =~ m,^(\d+\.\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ("$1.0", $2); - } elsif ($net =~ m,^(\d+\.\d+)/(\d+)$,) { - ($base, $bits) = ("$1.0.0", $2); - } elsif ($net eq 'default' || $net eq 'any') { - ($base, $bits) = ("0.0.0.0", 0); - } elsif ($net =~ m,^(\d+\.\d+\.\d+\.\d+)\s*-\s*(\d+\.\d+\.\d+\.\d+)$,) { - # whois format - $ibase = quad2int($1); - my $end = quad2int($2); - $error = "illegal dotted quad: $net" - unless defined($ibase) && defined($end); - my $diff = ($end || 0) - ($ibase || 0) + 1; - $bits = $size2bits{$diff}; - $error = "could not find exact fit for $net" - if ! defined $error && ( - ! defined $bits - || ($ibase & ~$imask[$bits])); - } else { - $error = "could not parse $net"; - $error .= " $mask" if $mask; - } - - carp $error if $error && $debug; - - $ibase = quad2int($base || 0) unless defined $ibase; - unless (defined($ibase) || defined($error)) { - $error = "could not parse $net"; - $error .= " $mask" if $mask; - } - $ibase &= $imask[$bits] - if defined $ibase && defined $bits; - - $bits = 0 unless $bits; - if ($bits > 32) { - $error = "illegal number of bits: $bits" - unless $error; - $bits = 32; - } - - return bless { - 'IBASE' => $ibase, - 'BITS' => $bits, - ( $error ? ( 'ERROR' => $error ) : () ), - }; -} - -sub new2 -{ - local($debug) = 0; - my $net = new(@_); - return undef if $error; - return $net; -} - -sub errstr { return $error; } -sub debug { my $this = shift; return (@_ ? $debug = shift : $debug) } - -sub base { my ($this) = @_; return int2quad($this->{'IBASE'}); } -sub bits { my ($this) = @_; return $this->{'BITS'}; } -sub size { my ($this) = @_; return 2**(32- $this->{'BITS'}); } -sub next { my ($this) = @_; int2quad($this->{'IBASE'} + $this->size()); } - -sub broadcast -{ - my($this) = @_; - int2quad($this->{'IBASE'} + $this->size() - 1); -} - -*first = \&base; -*last = \&broadcast; - -sub desc -{ - return int2quad($_[0]->{'IBASE'}).'/'.$_[0]->{'BITS'}; -} - -sub imask -{ - return (2**32 -(2** (32- $_[0]))); -} - -sub mask -{ - my ($this) = @_; - - return int2quad ( $imask[$this->{'BITS'}]); -} - -sub hostmask -{ - my ($this) = @_; - - return int2quad ( ~ $imask[$this->{'BITS'}]); -} - -sub nth -{ - my ($this, $index, $bitstep) = @_; - my $size = $this->size(); - my $ibase = $this->{'IBASE'}; - $bitstep = 32 unless $bitstep; - my $increment = 2**(32-$bitstep); - $index *= $increment; - $index += $size if $index < 0; - return undef if $index < 0; - return undef if $index >= $size; - return int2quad($ibase+$index); -} - -sub enumerate -{ - my ($this, $bitstep) = @_; - $bitstep = 32 unless $bitstep; - my $size = $this->size(); - my $increment = 2**(32-$bitstep); - my @ary; - my $ibase = $this->{'IBASE'}; - for (my $i = 0; $i < $size; $i += $increment) { - push(@ary, int2quad($ibase+$i)); - } - return @ary; -} - -sub inaddr -{ - my ($this) = @_; - my $ibase = $this->{'IBASE'}; - my $blocks = int($this->size()/256); - return (join('.',unpack('xC3', pack('V', $ibase))).".in-addr.arpa", - $ibase%256, $ibase%256+$this->size()-1) if $blocks == 0; - my @ary; - for (my $i = 0; $i < $blocks; $i++) { - push(@ary, join('.',unpack('xC3', pack('V', $ibase+$i*256))) - .".in-addr.arpa", 0, 255); - } - return @ary; -} - -sub tag -{ - my $this = shift; - my $tag = shift; - my $val = $this->{'T'.$tag}; - $this->{'T'.$tag} = $_[0] if @_; - return $val; -} - -sub quad2int -{ - my @bytes = split(/\./,$_[0]); - - return undef unless @bytes == 4 && ! grep {!(/\d+$/ && $_<256)} @bytes; - - return unpack("N",pack("C4",@bytes)); -} - -sub int2quad -{ - return join('.',unpack('C4', pack("N", $_[0]))); -} - -sub storeNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - $t->{$base} = [] unless exists $t->{$base}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - $t->{$base}->[$i] = $this; -} - -sub deleteNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - return unless defined $t->{$base}; - - undef $t->{$base}->[$i]; - - for my $x (@{$t->{$base}}) { - return if $x; - } - delete $t->{$base}; -} - -sub findNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - - my $ip = quad2int($ipquad); - my %done; - - for (my $b = 32; $b >= 0; $b--) { - my $nb = $ip & $imask[$b]; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, 32); - next if $done{$mb}++; - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if ($i < 0 or $i > 32); - while ($i >= 0) { - return $t->{$nb}->[$i] - if defined $t->{$nb}->[$i]; - $i--; - } - } -} - -sub findOuterNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - - my $ip; - my $mask; - if (ref($ipquad)) { - $ip = $ipquad->{IBASE}; - $mask = $ipquad->{BITS}; - } else { - $ip = quad2int($ipquad); - $mask = 32; - } - - for (my $b = 0; $b <= $mask; $b++) { - my $nb = $ip & $imask[$b];; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, $mask); - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if $i < 0; - confess "$mb, $b, $ipquad, $nb" if $i > 32; - while ($i >= 0) { - return $t->{$nb}->[$i] - if defined $t->{$nb}->[$i]; - $i--; - } - } -} - -sub findAllNetblock -{ - my ($ipquad, $t) = @_; - $t = $remembered unless $t; - my @ary ; - my $ip = quad2int($ipquad); - my %done; - - for (my $b = 32; $b >= 0; $b--) { - my $nb = $ip & $imask[$b]; - next unless exists $t->{$nb}; - my $mb = imaxblock($nb, 32); - next if $done{$mb}++; - my $i = $b - $mb; - confess "$mb, $b, $ipquad, $nb" if $i < 0; - confess "$mb, $b, $ipquad, $nb" if $i > 32; - while ($i >= 0) { - push(@ary, $t->{$nb}->[$i]) - if defined $t->{$nb}->[$i]; - $i--; - } - } - return @ary; -} - -sub dumpNetworkTable -{ - my ($t) = @_; - $t = $remembered unless $t; - - my @ary; - foreach my $base (keys %$t) { - push(@ary, grep (defined($_), @{$t->{base}})); - for my $x (@{$t->{$base}}) { - push(@ary, $x) - if defined $x; - } - } - return sort @ary; -} - -sub checkNetblock -{ - my ($this, $t) = @_; - $t = $remembered unless $t; - - my $base = $this->{'IBASE'}; - - my $mb = maxblock($this); - my $b = $this->{'BITS'}; - my $i = $b - $mb; - - return defined $t->{$base}->[$i]; -} - -sub match -{ - my ($this, $ip) = @_; - my $i = quad2int($ip); - my $imask = $imask[$this->{BITS}]; - if (($i & $imask) == $this->{IBASE}) { - return (($i & ~ $imask) || "0 "); - } else { - return 0; - } -} - -sub maxblock -{ - my ($this) = @_; - return imaxblock($this->{'IBASE'}, $this->{'BITS'}); -} - -sub imaxblock -{ - my ($ibase, $tbit) = @_; - confess unless defined $ibase; - while ($tbit > 0) { - my $im = $imask[$tbit-1]; - last if (($ibase & $im) != $ibase); - $tbit--; - } - return $tbit; -} - -sub range2cidrlist -{ - my ($startip, $endip) = @_; - - my $start = quad2int($startip); - my $end = quad2int($endip); - - ($start, $end) = ($end, $start) - if $start > $end; - return irange2cidrlist($start, $end); -} - -sub irange2cidrlist -{ - my ($start, $end) = @_; - my @result; - while ($end >= $start) { - my $maxsize = imaxblock($start, 32); - my $maxdiff = 32 - int(log($end - $start + 1)/log(2)); - $maxsize = $maxdiff if $maxsize < $maxdiff; - push (@result, bless { - 'IBASE' => $start, - 'BITS' => $maxsize - }); - $start += 2**(32-$maxsize); - } - return @result; -} - -sub cidrs2contiglists -{ - my (@cidrs) = sort_network_blocks(@_); - my @result; - while (@cidrs) { - my (@r) = shift(@cidrs); - my $max = $r[0]->{IBASE} + $r[0]->size; - while ($cidrs[0] && $cidrs[0]->{IBASE} <= $max) { - my $nm = $cidrs[0]->{IBASE} + $cidrs[0]->size; - $max = $nm if $nm > $max; - push(@r, shift(@cidrs)); - } - push(@result, [@r]); - } - return @result; -} - -sub cidrs2cidrs -{ - my (@cidrs) = sort_network_blocks(@_); - my @result; - while (@cidrs) { - my (@r) = shift(@cidrs); - my $max = $r[0]->{IBASE} + $r[0]->size; - while ($cidrs[0] && $cidrs[0]->{IBASE} <= $max) { - my $nm = $cidrs[0]->{IBASE} + $cidrs[0]->size; - $max = $nm if $nm > $max; - push(@r, shift(@cidrs)); - } - my $start = $r[0]->{IBASE}; - my $end = $max - 1; - push(@result, irange2cidrlist($start, $end)); - } - return @result; -} - -sub cidrs2inverse -{ - my $outer = shift; - $outer = __PACKAGE__->new($outer) unless ref($outer); - my (@cidrs) = cidrs2cidrs(@_); - my $first = $outer->{IBASE}; - my $last = $first + $outer->size() -1; - shift(@cidrs) while $cidrs[0] && $cidrs[0]->{IBASE} + $cidrs[0]->size < $first; - my @r; - while (@cidrs && $first < $last) { - if ($first < $cidrs[0]->{IBASE}) { - if ($last <= $cidrs[0]->{IBASE}-1) { - return (@r, irange2cidrlist($first, $last)); - } - push(@r, irange2cidrlist($first, $cidrs[0]->{IBASE}-1)); - } - last if $cidrs[0]->{IBASE} > $last; - $first = $cidrs[0]->{IBASE} + $cidrs[0]->size; - shift(@cidrs); - } - if ($first < $last) { - push(@r, irange2cidrlist($first, $last)); - } - return @r; -} - -sub by_net_netmask_block -{ - $a->{'IBASE'} <=> $b->{'IBASE'} - || $a->{'BITS'} <=> $b->{'BITS'}; -} - -sub sameblock -{ - return ! cmpblocks(@_); -} - -sub cmpblocks -{ - my $this = shift; - my $class = ref $this; - my $other = (ref $_[0]) ? shift : $class->new(@_); - return cmp_net_netmask_block($this, $other); -} - -sub contains -{ - my $this = shift; - my $class = ref $this; - my $other = (ref $_[0]) ? shift : $class->new(@_); - return 0 if $this->{IBASE} > $other->{IBASE}; - return 0 if $this->{BITS} > $other->{BITS}; - return 0 if $other->{IBASE} > $this->{IBASE} + $this->size -1; - return 1; -} - -sub cmp_net_netmask_block -{ - return ($_[0]->{IBASE} <=> $_[1]->{IBASE} - || $_[0]->{BITS} <=> $_[1]->{BITS}); -} - -sub sort_network_blocks -{ - return - map $_->[0], - sort { $a->[1] <=> $b->[1] || $a->[2] <=> $b->[2] } - map [ $_, $_->{IBASE}, $_->{BITS} ], @_; - -} - -sub sort_by_ip_address -{ - return - map $_->[0], - sort { $a->[1] cmp $b->[1] } - map [ $_, pack("C4",split(/\./,$_)) ], @_; - -} - -BEGIN { - for (my $i = 0; $i <= 32; $i++) { - $imask[$i] = imask($i); - $imask2bits{$imask[$i]} = $i; - $quadmask2bits{int2quad($imask[$i])} = $i; - $quadhostmask2bits{int2quad(~$imask[$i])} = $i; - $size2bits{ 2**(32-$i) } = $i; - } -} -1; diff --git a/lib/mogdeps/Perlbal.pm b/lib/mogdeps/Perlbal.pm deleted file mode 100644 index eceabb26..00000000 --- a/lib/mogdeps/Perlbal.pm +++ /dev/null @@ -1,1377 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -=head1 NAME - -Perlbal - Reverse-proxy load balancer and webserver - -=head1 SEE ALSO - - http://www.danga.com/perlbal/ - -=head1 COPYRIGHT AND LICENSE - -Copyright 2004, Danga Interactive, Inc. -Copyright 2005-2007, Six Apart, Ltd. - -You can use and redistribute Perlbal under the same terms as Perl itself. - -=cut - -package Perlbal; - -BEGIN { - # keep track of anonymous subs' origins: - $^P |= 0x200; -} - -my $has_gladiator = eval "use Devel::Gladiator; 1;"; -my $has_cycle = eval "use Devel::Cycle; 1;"; -use Devel::Peek; - -use vars qw($VERSION); -$VERSION = '1.73'; - -use constant DEBUG => $ENV{PERLBAL_DEBUG} || 0; -use constant DEBUG_OBJ => $ENV{PERLBAL_DEBUG_OBJ} || 0; -use constant TRACK_STATES => $ENV{PERLBAL_TRACK_STATES} || 0; # if on, track states for "state changes" command - -use strict; -use warnings; -no warnings qw(deprecated); - -use Storable (); -use IO::Socket; -use IO::Handle; -use IO::File; - -$Perlbal::SYSLOG_AVAILABLE = eval { require Sys::Syslog; 1; }; -$Perlbal::BSD_RESOURCE_AVAILABLE = eval { require BSD::Resource; 1; }; - -# incremented every second by a timer: -$Perlbal::tick_time = time(); - -# Set to 1 when we open syslog, and 0 when we close it -$Perlbal::syslog_open = 0; - -use Getopt::Long; -use Carp qw(cluck croak); -use Errno qw(EBADF); -use POSIX (); - -our(%TrackVar); -sub track_var { - my ($name, $ref) = @_; - $TrackVar{$name} = $ref; -} - -use Perlbal::AIO; -use Perlbal::HTTPHeaders; -use Perlbal::Service; -use Perlbal::Socket; -use Perlbal::TCPListener; -use Perlbal::UploadListener; -use Perlbal::ClientManage; -use Perlbal::ClientHTTPBase; -use Perlbal::ClientProxy; -use Perlbal::ClientHTTP; -use Perlbal::BackendHTTP; -use Perlbal::ReproxyManager; -use Perlbal::Pool; -use Perlbal::ManageCommand; -use Perlbal::CommandContext; -use Perlbal::Util; - -$SIG{'PIPE'} = "IGNORE"; # handled manually - -our(%hooks); # hookname => subref -our(%service); # servicename -> Perlbal::Service -our(%pool); # poolname => Perlbal::Pool -our(%plugins); # plugin => 1 (shows loaded plugins) -our($last_error); -our $service_autonumber = 1; # used to generate names for anonymous services created with Perlbal->create_service() -our $vivify_pools = 1; # if on, allow automatic creation of pools -our $foreground = 1; # default to foreground -our $track_obj = 0; # default to not track creation locations -our $reqs = 0; # total number of requests we've done -our $starttime = time(); # time we started -our $pidfile = ''; # full path, default to not writing pidfile -# used by pidfile (only makes sense before run started) -# don't rely on this variable, it might change. -our $run_started = 0; -our ($lastutime, $laststime, $lastreqs) = (0, 0, 0); # for deltas - -our %PluginCase = (); # lowercase plugin name -> as file is named - -# setup XS status data structures -our %XSModules; # ( 'headers' => 'Perlbal::XS::HTTPHeaders' ) - -# now include XS files -eval "use Perlbal::XS::HTTPHeaders;"; # if we have it, load it - -# activate modules as necessary -if ($ENV{PERLBAL_XS_HEADERS} && $XSModules{headers}) { - Perlbal::XS::HTTPHeaders::enable(); -} - -# setup a USR1 signal handler that tells us to dump some basic statistics -# of how we're doing to the syslog -$SIG{'USR1'} = sub { - my $dumper = sub { Perlbal::log('info', $_[0]); }; - foreach my $svc (values %service) { - run_manage_command("show service $svc->{name}", $dumper); - } - run_manage_command('states', $dumper); - run_manage_command('queues', $dumper); -}; - -sub error { - $last_error = shift; - return 0; -} - -# Object instance counts, for debugging and leak detection -our(%ObjCount); # classname -> instances -our(%ObjTotal); # classname -> instances -our(%ObjTrack); # "$objref" -> creation location -sub objctor { - if (DEBUG_OBJ) { - my $ref = ref $_[0]; - $ref .= "-$_[1]" if $_[1]; - $ObjCount{$ref}++; - $ObjTotal{$ref}++; - - # now, if we're tracing leaks, note this object's creation location - if ($track_obj) { - my $i = 1; - my @list; - while (my $sub = (caller($i++))[3]) { - push @list, $sub; - } - $ObjTrack{"$_[0]"} = [ time, join(', ', @list) ]; - } - } -} -sub objdtor { - if (DEBUG_OBJ) { - my $ref = ref $_[0]; - $ref .= "-$_[1]" if $_[1]; - $ObjCount{$ref}--; - - # remove tracking for this object - if ($track_obj) { - delete $ObjTrack{"$_[0]"}; - } - } -} - -sub register_global_hook { - $hooks{$_[0]} = $_[1]; - return 1; -} - -sub unregister_global_hook { - delete $hooks{$_[0]}; - return 1; -} - -sub run_global_hook { - my $hookname = shift; - my $ref = $hooks{$hookname}; - return $ref->(@_) if defined $ref; # @_ is $mc (a Perlbal::ManageCommand) - return undef; -} - -sub service_names { - return sort keys %service; -} - -# class method: given a service name, returns a service object -sub service { - my $class = shift; - return $service{$_[0]}; -} - -sub create_service { - my $class = shift; - my $name = shift; - - unless (defined($name)) { - $name = "____auto_".($service_autonumber++); - } - - croak("service '$name' already exists") if $service{$name}; - croak("pool '$name' already exists") if $pool{$name}; - - # Create the new service and return it - return $service{$name} = Perlbal::Service->new($name); -} - -sub pool { - my $class = shift; - return $pool{$_[0]}; -} - -# given some plugin name, return its correct case -sub plugin_case { - my $pname = lc shift; - return $PluginCase{$pname} || $pname; -} - -# run a block of commands. returns true if they all passed -sub run_manage_commands { - my ($cmd_block, $out, $ctx) = @_; - - $ctx ||= Perlbal::CommandContext->new; - foreach my $cmd (split(/\n/, $cmd_block)) { - return 0 unless Perlbal::run_manage_command($cmd, $out, $ctx); - } - return 1; -} - -# allows ${ip:eth0} in config. currently the only supported expansion -sub _expand_config_var { - my $cmd = shift; - $cmd =~ /^(\w+):(.+)/ - or die "Unknown config variable: $cmd\n"; - my ($type, $val) = ($1, $2); - if ($type eq "ip") { - die "Bogus-looking iface name" unless $val =~ /^\w+$/; - my $conf = `/sbin/ifconfig $val`; - $conf =~ /inet addr:(\S+)/ - or die "Can't find IP of interface '$val'"; - return $1; - } - die "Unknown config variable type: $type\n"; -} - -# returns 1 if command succeeded, 0 otherwise -sub run_manage_command { - my ($cmd, $out, $ctx) = @_; # $out is output stream closure - - $cmd =~ s/\#.*//; - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - $cmd =~ s/\s+/ /g; - - my $orig = $cmd; # save original case for some commands - $cmd =~ s/^([^=]+)/lc $1/e; # lowercase everything up to an = - return 1 unless $cmd =~ /^\S/; - - # expand variables - $cmd =~ s/\$\{(.+?)\}/_expand_config_var($1)/eg; - $cmd =~ s/\$(\w+)/$ENV{$1}/g; - - $out ||= sub {}; - $ctx ||= Perlbal::CommandContext->new; - - my $err = sub { - $out->("ERROR: $_[0]"); - return 0; - }; - my $ok = sub { - $out->("OK") if $ctx->verbose; - return 1; - }; - - return $err->("invalid command") unless $cmd =~ /^(\w+)/; - my $basecmd = $1; - - my $mc = Perlbal::ManageCommand->new($basecmd, $cmd, $out, $ok, $err, $orig, $ctx); - - # for testing auto crashing and recovery: - if ($basecmd eq "crash") { die "Intentional crash." }; - - no strict 'refs'; - my $handler; - if ($Perlbal::{"MANAGE_$basecmd"} && ($handler = *{"MANAGE_$basecmd"}{CODE})) { - my $rv = eval { $handler->($mc); }; - return $mc->err($@) if $@; - return $rv; - } - - # if no handler found, look for plugins - - # call any hooks if they've been defined - my $rval = eval { run_global_hook("manage_command.$basecmd", $mc); }; - return $mc->err($@) if $@; - if (defined $rval) { - # commands may return boolean, or arrayref to mass-print - if (ref $rval eq "ARRAY") { - $mc->out($_) foreach @$rval; - return 1; - } - return $rval; - } - - return $mc->err("unknown command: $basecmd"); -} - -sub arena_ref_counts { - my $all = Devel::Gladiator::walk_arena(); - my %ct; - - my %run_cycle; - foreach my $it (@$all) { - $ct{ref $it}++; - if (ref $it eq "CODE") { - my $name = Devel::Peek::CvGV($it); - $ct{$name}++ if $name =~ /ANON/; - } - } - $all = undef; - return \%ct; -} - -my %last_gladiator; -sub MANAGE_gladiator { - my $mc = shift->no_opts; - unless ($has_gladiator) { - $mc->end; - return; - } - - my $ct = arena_ref_counts(); - my $ret; - $ret .= "ARENA COUNTS:\n"; - foreach my $k (sort {$ct->{$b} <=> $ct->{$a}} keys %$ct) { - my $delta = $ct->{$k} - ($last_gladiator{$k} || 0); - $last_gladiator{$k} = $ct->{$k}; - next unless $ct->{$k} > 1; - $ret .= sprintf(" %4d %-4d $k\n", $ct->{$k}, $delta); - } - - $mc->out($ret); - $mc->end; -} - -sub MANAGE_varsize { - my $mc = shift->no_opts; - - my $emit; - $emit = sub { - my ($v, $depth, $name) = @_; - $name ||= ""; - - my $show; - if (ref $v eq "ARRAY") { - return unless @$v; - $show = "[] " . scalar @$v; - } - elsif (ref $v eq "HASH") { - return unless %$v; - $show = "{} " . scalar keys %$v; - } - else { - $show = " = $v"; - } - my $pre = " " x $depth; - $mc->out("$pre$name $show"); - - if (ref $v eq "HASH") { - foreach my $k (sort keys %$v) { - $emit->($v->{$k}, $depth+1, "{$k}"); - } - } - }; - - foreach my $k (sort keys %TrackVar) { - my $v = $TrackVar{$k} or next; - $emit->($v, 0, $k); - } - - $mc->end; -} - -sub MANAGE_obj { - my $mc = shift->no_opts; - - foreach (sort keys %ObjCount) { - $mc->out("$_ = $ObjCount{$_} (tot=$ObjTotal{$_})"); - } - $mc->end; -} - -sub MANAGE_verbose { - my $mc = shift->parse(qr/^verbose (on|off)$/, - "usage: VERBOSE {on|off}"); - my $onoff = $mc->arg(1); - $mc->{ctx}->verbose(lc $onoff eq 'on' ? 1 : 0); - return $mc->ok; -} - -sub MANAGE_shutdown { - my $mc = shift->parse(qr/^shutdown(\s?graceful)?\s?(\d+)?$/); - - # immediate shutdown - exit(0) unless $mc->arg(1); - - # set connect ahead to 0 for all services so they don't spawn extra backends - foreach my $svc (values %service) { - $svc->{connect_ahead} = 0; - } - - # tell all sockets we're doing a graceful stop - my $sf = Perlbal::Socket->get_sock_ref; - foreach my $k (keys %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - $v->die_gracefully if $v->can("die_gracefully"); - } - - # register a post loop callback that will end the event loop when we only have - # a single socket left, the AIO socket - Perlbal::Socket->SetPostLoopCallback(sub { - my ($descmap, $otherfds) = @_; - - # Ghetto: duplicate the code we already had for our postloopcallback - Perlbal::Socket::run_callbacks(); - - # see what we have here; make sure we have no Clients and no unbored Backends - foreach my $sock (values %$descmap) { - my $ref = ref $sock; - return 1 if $ref =~ /^Perlbal::Client/ && $ref ne 'Perlbal::ClientManage'; - return 1 if $sock->isa('Perlbal::BackendHTTP') && $sock->{state} ne 'bored'; - } - return 0; # end the event loop and thus we exit perlbal - }); - - # If requested, register a callback to kill the perlbal process after a specified number of seconds - if (my $timeout = $mc->arg(2)) { - Perlbal::Socket::register_callback($timeout, sub { exit(0); }); - } - - # so they know something happened - return $mc->ok; -} - -sub MANAGE_mime { - my $mc = shift->parse(qr/^mime(?:\s+(\w+)(?:\s+(\w+))?(?:\s+(\S+))?)?$/); - my ($cmd, $arg1, $arg2) = ($mc->arg(1), $mc->arg(2), $mc->arg(3)); - - if (!$cmd || $cmd eq 'list') { - foreach my $key (sort keys %$Perlbal::ClientHTTPBase::MimeType) { - $mc->out("$key $Perlbal::ClientHTTPBase::MimeType->{$key}"); - } - $mc->end; - } elsif ($cmd eq 'set') { - if (!$arg1 || !$arg2) { - return $mc->err("Usage: set "); - } - - $Perlbal::ClientHTTPBase::MimeType->{$arg1} = $arg2; - return $mc->out("$arg1 set to $arg2."); - } elsif ($cmd eq 'remove') { - if (delete $Perlbal::ClientHTTPBase::MimeType->{$arg1}) { - return $mc->out("$arg1 removed."); - } else { - return $mc->err("$arg1 not a defined extension."); - } - } else { - return $mc->err("Usage: list, remove , add "); - } -} - -sub MANAGE_xs { - my $mc = shift->parse(qr/^xs(?:\s+(\w+)\s+(\w+))?$/); - my ($cmd, $module) = ($mc->arg(1), $mc->arg(2)); - - if ($cmd) { - # command? verify - return $mc->err('Known XS modules: ' . join(', ', sort keys %XSModules) . '.') - unless $XSModules{$module}; - - # okay, so now enable or disable this module - if ($cmd eq 'enable') { - my $res = eval "return $XSModules{$module}::enable();"; - return $mc->err("Unable to enable module.") - unless $res; - return $mc->ok; - } elsif ($cmd eq 'disable') { - my $res = eval "return $XSModules{$module}::disable();"; - return $mc->err("Unable to disable module.") - unless $res; - return $mc->out("Module disabled."); - } else { - return $mc->err('Usage: xs [ ]'); - } - } else { - # no commands, so just check status - $mc->out('XS module status:', ''); - foreach my $module (sort keys %XSModules) { - my $class = $XSModules{$module}; - my $enabled = eval "return \$${class}::Enabled;"; - my $status = defined $enabled ? ($enabled ? "installed, enabled" : - "installed, disabled") : "not installed"; - $mc->out(" $module: $status"); - } - $mc->out(' No modules available.') unless %XSModules; - $mc->out(''); - $mc->out("To enable a module: xs enable "); - $mc->out("To disable a module: xs disable "); - } - $mc->end; -} - -sub MANAGE_fd { - my $mc = shift->no_opts; - return $mc->err('This command is not available unless BSD::Resource is installed') unless $Perlbal::BSD_RESOURCE_AVAILABLE; - - # called in list context on purpose, but we want the hard limit - my (undef, $max) = BSD::Resource::getrlimit(BSD::Resource::RLIMIT_NOFILE()); - my $ct = 0; - - # first try procfs if one exists, as that's faster than iterating - if (opendir(DIR, "/proc/self/fd")) { - my @dirs = readdir(DIR); - $ct = scalar(@dirs) - 2; # don't count . and .. - closedir(DIR); - } else { - # isatty() is cheap enough to do on everything - foreach (0..$max) { - my $res = POSIX::isatty($_); - $ct++ if $res || ($! != EBADF); - } - } - $mc->out("max $max"); - $mc->out("cur $ct"); - $mc->end; -} - -sub MANAGE_proc { - my $mc = shift->no_opts; - - $mc->out('time: ' . time()); - $mc->out('pid: ' . $$); - - - if ($Perlbal::BSD_RESOURCE_AVAILABLE) { - my $ru = BSD::Resource::getrusage(); - my ($ut, $st) = ($ru->utime, $ru->stime); - my ($udelta, $sdelta) = ($ut - $lastutime, $st - $laststime); - $mc->out("utime: $ut (+$udelta)"); - $mc->out("stime: $st (+$sdelta)"); - ($lastutime, $laststime, $lastreqs) = ($ut, $st, $reqs); - } - - my $rdelta = $reqs - $lastreqs; - $mc->out("reqs: $reqs (+$rdelta)"); - $lastreqs = $reqs; - - $mc->end; -} - -sub MANAGE_nodes { - my $mc = shift->parse(qr/^nodes?(?:\s+(\d+.\d+.\d+.\d+)(?::(\d+))?)?$/); - - my ($ip, $port) = ($mc->arg(1), $mc->arg(2) || 80); - my $spec_ipport = $ip ? "$ip:$port" : undef; - my $ref = \%Perlbal::BackendHTTP::NodeStats; - - my $dump = sub { - my $ipport = shift; - foreach my $key (keys %{$ref->{$ipport}}) { - if (ref $ref->{$ipport}->{$key} eq 'ARRAY') { - my %temp; - $temp{$_}++ foreach @{$ref->{$ipport}->{$key}}; - foreach my $tkey (keys %temp) { - $mc->out("$ipport $key $tkey $temp{$tkey}"); - } - } else { - $mc->out("$ipport $key $ref->{$ipport}->{$key}"); - } - } - }; - - # dump a node, or all nodes - if ($spec_ipport) { - $dump->($spec_ipport); - } else { - foreach my $ipport (keys %$ref) { - $dump->($ipport); - } - } - - $mc->end; -} - -# singular also works for the nodes command -*MANAGE_node = \&MANAGE_nodes; - -sub MANAGE_prof { - my $mc = shift->parse(qr/^prof\w*\s+(on|off|data)$/); - my $which = $mc->arg(1); - - if ($which eq 'on') { - if (Danga::Socket->EnableProfiling) { - return $mc->ok; - } else { - return $mc->err('Unable to enable profiling. Please ensure you have the BSD::Resource module installed.'); - } - } - - if ($which eq 'off') { - Danga::Socket->DisableProfiling; - return $mc->ok; - } - - if ($which eq 'data') { - my $href = Danga::Socket->ProfilingData; - foreach my $key (sort keys %$href) { - my ($utime, $stime, $calls) = @{$href->{$key}}; - $mc->out(sprintf("%s %0.5f %0.5f %d %0.7f %0.7f", - $key, $utime, $stime, $calls, $utime / $calls, $stime / $calls)); - } - $mc->end; - } -} - -sub MANAGE_uptime { - my $mc = shift->no_opts; - - $mc->out("starttime $starttime"); - $mc->out("uptime " . (time() - $starttime)); - $mc->out("version $Perlbal::VERSION"); - $mc->end; -} - -*MANAGE_version = \&MANAGE_uptime; - -sub MANAGE_track { - my $mc = shift->no_opts; - - my $now = time(); - my @list; - foreach (keys %ObjTrack) { - my $age = $now - $ObjTrack{$_}->[0]; - push @list, [ $age, "${age}s $_: $ObjTrack{$_}->[1]" ]; - } - - # now output based on sorted age - foreach (sort { $a->[0] <=> $b->[0] } @list) { - $mc->out($_->[1]); - } - $mc->end; -} - -sub MANAGE_socks { - my $mc = shift->parse(qr/^socks(?: (\w+))?$/); - my $mode = $mc->arg(1) || "all"; - - my $sf = Perlbal::Socket->get_sock_ref; - - if ($mode eq "summary") { - my %count; - my $write_buf = 0; - my $open_files = 0; - while (my $k = each %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - $count{ref $v}++; - $write_buf += $v->{write_buf_size}; - if ($v->isa("Perlbal::ClientHTTPBase")) { - my Perlbal::ClientHTTPBase $cv = $v; - $open_files++ if $cv->{'reproxy_fh'}; - } - } - - foreach (sort keys %count) { - $mc->out(sprintf("%5d $_", $count{$_})); - } - $mc->out(); - $mc->out(sprintf("Aggregate write buffer: %.1fk", $write_buf / 1024)); - $mc->out(sprintf(" Open files: %d", $open_files)); - } elsif ($mode eq "all") { - my $now = time; - $mc->out(sprintf("%5s %6s", "fd", "age")); - foreach (sort { $a <=> $b } keys %$sf) { - my $sock = $sf->{$_}; - my $age; - eval { - $age = $now - $sock->{create_time}; - }; - $age ||= 0; - $mc->out(sprintf("%5d %5ds %s", $_, $age, $sock->as_string)); - } - } - $mc->end; -} - -sub MANAGE_backends { - my $mc = shift->no_opts; - - my $sf = Perlbal::Socket->get_sock_ref; - my %nodes; # { "Backend" => int count } - foreach my $sock (values %$sf) { - if ($sock->isa("Perlbal::BackendHTTP")) { - my Perlbal::BackendHTTP $cv = $sock; - $nodes{"$cv->{ipport}"}++; - } - } - - # now print out text - foreach my $node (sort keys %nodes) { - $mc->out("$node " . $nodes{$node}); - } - - $mc->end; -} - -sub MANAGE_noverify { - my $mc = shift->no_opts; - - # shows the amount of time left for each node marked as noverify - my $now = time; - foreach my $ipport (keys %Perlbal::BackendHTTP::NoVerify) { - my $until = $Perlbal::BackendHTTP::NoVerify{$ipport} - $now; - $mc->out("$ipport $until"); - } - $mc->end; -} - -sub MANAGE_pending { - my $mc = shift->no_opts; - - # shows pending backend connections by service, node, and age - my %pend; # { "service" => { "ip:port" => age } } - my $now = time; - - foreach my $svc (values %service) { - foreach my $ipport (keys %{$svc->{pending_connects}}) { - my Perlbal::BackendHTTP $be = $svc->{pending_connects}->{$ipport}; - next unless defined $be; - $pend{$svc->{name}}->{$ipport} = $now - $be->{create_time}; - } - } - - foreach my $name (sort keys %pend) { - foreach my $ipport (sort keys %{$pend{$name}}) { - $mc->out("$name $ipport $pend{$name}{$ipport}"); - } - } - $mc->end; -} - -sub MANAGE_states { - my $mc = shift->parse(qr/^states(?:\s+(.+))?$/); - - my $svc; - if (defined $mc->arg(1)) { - $svc = $service{$mc->arg(1)}; - return $mc->err("Service not found.") - unless defined $svc; - } - - my $sf = Perlbal::Socket->get_sock_ref; - - my %states; # { "Class" => { "State" => int count; } } - foreach my $sock (values %$sf) { - next unless $sock->can('state'); - my $state = $sock->state; - next unless defined $state; - if (defined $svc) { - next unless $sock->isa('Perlbal::ClientProxy') || - $sock->isa('Perlbal::BackendHTTP') || - $sock->isa('Perlbal::ClientHTTP'); - next unless $sock->{service} == $svc; - } - $states{ref $sock}->{$state}++; - } - - # now print out text - foreach my $class (sort keys %states) { - foreach my $state (sort keys %{$states{$class}}) { - $mc->out("$class $state " . $states{$class}->{$state}); - } - } - $mc->end; -} - -sub MANAGE_queues { - my $mc = shift->no_opts; - my $now = time; - - foreach my $svc (values %service) { - next unless $svc->{role} eq 'reverse_proxy'; - - my %queues = ( - normal => 'waiting_clients', - highpri => 'waiting_clients_highpri', - lowpri => 'waiting_clients_lowpri', - ); - - while (my ($queue_name, $clients_key) = each %queues) { - my $age = 0; - my $count = @{$svc->{$clients_key}}; - my Perlbal::ClientProxy $oldest = $svc->{$clients_key}->[0]; - $age = $now - $oldest->{last_request_time} if defined $oldest; - $mc->out("$svc->{name}-$queue_name.age $age"); - $mc->out("$svc->{name}-$queue_name.count $count"); - } - } - $mc->end; -} - -sub MANAGE_state { - my $mc = shift->parse(qr/^state changes$/); - my $hr = Perlbal::Socket->get_statechange_ref; - my %final; # { "state" => count } - while (my ($obj, $arref) = each %$hr) { - $mc->out("$obj: " . join(', ', @$arref)); - $final{$arref->[-1]}++; - } - foreach my $k (sort keys %final) { - $mc->out("$k $final{$k}"); - } - $mc->end; -} - -sub MANAGE_leaks { - my $mc = shift->parse(qr/^leaks(?:\s+(.+))?$/); - return $mc->err("command disabled without \$ENV{PERLBAL_DEBUG} set") - unless $ENV{PERLBAL_DEBUG}; - - my $what = $mc->arg(1); - - # iterates over active objects. if you specify an argument, it is treated as code - # with $_ being the reference to the object. - # shows objects that we think might have been leaked - my $ref = Perlbal::Socket::get_created_objects_ref; - foreach (@$ref) { - next unless $_; # might be undef! - if ($what) { - my $rv = eval "$what"; - return $mc->err("$@") if $@; - next unless defined $rv; - $mc->out($rv); - } else { - $mc->out($_->as_string); - } - } - $mc->end; -} - -sub MANAGE_show { - my $mc = shift; - - if ($mc->cmd =~ /^show service (\w+)$/) { - my $sname = $1; - my Perlbal::Service $svc = $service{$sname}; - return $mc->err("Unknown service") unless $svc; - $svc->stats_info($mc->out); - return $mc->end; - } - - if ($mc->cmd =~ /^show pool(?:\s+(\w+))?$/) { - my $pool = $1; - if ($pool) { - my $pl = $pool{$pool}; - return $mc->err("pool '$pool' does not exist") unless $pl; - - foreach my $node (@{ $pl->nodes }) { - my $ipport = "$node->[0]:$node->[1]"; - $mc->out($ipport . " " . $pl->node_used($ipport)); - } - } else { - foreach my $name (sort keys %pool) { - my Perlbal::Pool $pl = $pool{$name}; - $mc->out("$name nodes $pl->{node_count}"); - $mc->out("$name services $pl->{use_count}"); - } - } - return $mc->end; - } - - if ($mc->cmd =~ /^show service$/) { - foreach my $name (sort keys %service) { - my $svc = $service{$name}; - my $listen = $svc->{listen} || "not_listening"; - $mc->out("$name $listen " . ($svc->{enabled} ? "ENABLED" : "DISABLED")); - } - return $mc->end; - } - - return $mc->parse_error; -} - -sub MANAGE_server { - my $mc = shift->parse(qr/^server (\S+) ?= ?(.+)$/); - my ($key, $val) = ($mc->arg(1), $mc->arg(2)); - - if ($key =~ /^max_reproxy_connections(?:\((.+)\))?/) { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $hostip = $1; - if (defined $hostip) { - $Perlbal::ReproxyManager::ReproxyMax{$hostip} = $val+0; - } else { - $Perlbal::ReproxyManager::ReproxyGlobalMax = $val+0; - } - return $mc->ok; - } - - if ($key eq "max_connections") { - return $mc->err('This command is not available unless BSD::Resource is installed') unless $Perlbal::BSD_RESOURCE_AVAILABLE; - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $rv = BSD::Resource::setrlimit(BSD::Resource::RLIMIT_NOFILE(), $val, $val); - unless (defined $rv && $rv) { - if ($> == 0) { - $mc->err("Unable to set limit."); - } else { - $mc->err("Need to be root to increase max connections."); - } - } - return $mc->ok; - } - - if ($key eq "nice_level") { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - my $rv = POSIX::nice($val); - $mc->err("Unable to renice: $!") - unless defined $rv; - return $mc->ok; - } - - if ($key eq "aio_mode") { - return $mc->err("Unknown AIO mode") unless $val =~ /^none|linux|ioaio$/; - return $mc->err("Linux::AIO no longer supported") if $val eq "linux"; - return $mc->err("IO::AIO not available") if $val eq "ioaio" && ! $Perlbal::OPTMOD_IO_AIO; - $Perlbal::AIO_MODE = $val; - return $mc->ok; - } - - if ($key eq "aio_threads") { - return $mc->err("Expected numeric parameter") unless $val =~ /^-?\d+$/; - IO::AIO::min_parallel($val) - if $Perlbal::OPTMOD_IO_AIO; - return $mc->ok; - } - - if ($key eq "track_obj") { - return $mc->err("Expected 1 or 0") unless $val eq '1' || $val eq '0'; - $track_obj = $val + 0; - %ObjTrack = () if $val; # if we're turning it on, clear it out - return $mc->ok; - } - - if ($key eq "pidfile") { - return $mc->err("pidfile must be configured at startup, before Perlbal::run is called") if $run_started; - return $mc->err("Expected full pathname to pidfile") unless $val; - $pidfile = $val; - return $mc->ok; - } - - if ($key eq "crash_backtrace") { - return $mc->err("Expected 1 or 0") unless $val eq '1' || $val eq '0'; - if ($val) { - $SIG{__DIE__} = sub { Carp::confess(@_) }; - } else { - $SIG{__DIE__} = undef; - } - return $mc->ok; - } - - return $mc->err("unknown server option '$val'"); -} - -sub MANAGE_dumpconfig { - my $mc = shift; - - while (my ($name, $pool) = each %pool) { - $mc->out("CREATE POOL $name"); - - if ($pool->can("dumpconfig")) { - foreach my $line ($pool->dumpconfig) { - $mc->out(" $line"); - } - } else { - my $class = ref($pool); - $mc->out(" # Pool class '$class' is unable to dump config."); - } - } continue { - $mc->out(""); - } - - while (my ($name, $service) = each %service) { - $mc->out("CREATE SERVICE $name"); - - if ($service->can("dumpconfig")) { - foreach my $line ($service->dumpconfig) { - $mc->out(" $line"); - } - } else { - my $class = ref($service); - $mc->out(" # Service class '$class' is unable to dump config."); - } - - my $state = $service->{enabled} ? "ENABLE" : "DISABLE"; - $mc->out("$state $name"); - } continue { - $mc->out(""); - } - - return $mc->ok -} - -sub MANAGE_reproxy_state { - my $mc = shift; - Perlbal::ReproxyManager::dump_state($mc->out); - return 1; -} - -sub MANAGE_create { - my $mc = shift->parse(qr/^create (service|pool) (\w+)$/, - "usage: CREATE {service|pool} "); - my ($what, $name) = $mc->args; - - if ($what eq "service") { - return $mc->err("service '$name' already exists") if $service{$name}; - return $mc->err("pool '$name' already exists") if $pool{$name}; - Perlbal->create_service($name); - $mc->{ctx}{last_created} = $name; - return $mc->ok; - } - - if ($what eq "pool") { - return $mc->err("pool '$name' already exists") if $pool{$name}; - return $mc->err("service '$name' already exists") if $service{$name}; - $vivify_pools = 0; - $pool{$name} = Perlbal::Pool->new($name); - $mc->{ctx}{last_created} = $name; - return $mc->ok; - } -} - -sub MANAGE_use { - my $mc = shift->parse(qr/^use (\w+)$/, - "usage: USE "); - my ($name) = $mc->args; - return $mc->err("Non-existent pool or service '$name'") unless $pool{$name} || $service{$name}; - - $mc->{ctx}{last_created} = $name; - return $mc->ok; -} - -sub MANAGE_pool { - my $mc = shift->parse(qr/^pool (\w+) (\w+) (\d+.\d+.\d+.\d+)(?::(\d+))?$/); - my ($cmd, $name, $ip, $port) = $mc->args; - $port ||= 80; - - my $good_cmd = qr/^(?:add|remove)$/; - - # "add" and "remove" can be in either order - ($cmd, $name) = ($name, $cmd) if $name =~ /$good_cmd/; - return $mc->err("Invalid command: must be 'add' or 'remove'") - unless $cmd =~ /$good_cmd/; - - my $pl = $pool{$name}; - return $mc->err("Pool '$name' not found") unless $pl; - $pl->$cmd($ip, $port); - return $mc->ok; -} - -sub MANAGE_set { - my $mc = shift->parse(qr/^set (?:(\w+)[\. ])?([\w\.]+) ?= ?(.+)$/, - "usage: SET [] = "); - my ($name, $key, $val) = $mc->args; - unless ($name ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service/pool name not implied from context"); - } - - if (my Perlbal::Service $svc = $service{$name}) { - return $svc->set($key, $val, $mc); - } elsif (my Perlbal::Pool $pl = $pool{$name}) { - return $pl->set($key, $val, $mc); - } - return $mc->err("service/pool '$name' does not exist"); -} - - -sub MANAGE_header { - my $mc = shift->parse(qr/^header\s+(\w+)\s+(insert|remove)\s+(.+?)(?:\s*:\s*(.+))?$/i, - "Usage: HEADER {INSERT|REMOVE}
[: ]"); - - my ($svc_name, $action, $header, $val) = $mc->args; - my $svc = $service{$svc_name}; - return $mc->err("service '$svc_name' does not exist") unless $svc; - return $svc->header_management($action, $header, $val, $mc); -} - -sub MANAGE_enable { - my $mc = shift->parse(qr/^(disable|enable) (\w+)$/, - "Usage: {ENABLE|DISABLE} "); - my ($verb, $name) = $mc->args; - my $svc = $service{$name}; - return $mc->err("service '$name' does not exist") unless $svc; - return $svc->$verb($mc); -} -*MANAGE_disable = \&MANAGE_enable; - -sub MANAGE_unload { - my $mc = shift->parse(qr/^unload (\w+)$/); - my ($fn) = $mc->args; - $fn = $PluginCase{lc $fn}; - my $rv = eval "Perlbal::Plugin::$fn->unload; 1;"; - $plugins{$fn} = 0; - return $mc->ok; -} - - -sub MANAGE_load { - my $mc = shift->parse(qr/^load \w+$/); - - my $fn; - $fn = $1 if $mc->orig =~ /^load (\w+)$/i; - - my $last_case; - my $last_class; - - my $load = sub { - my $name = shift; - $last_case = $name; - my $class = $last_class = "Perlbal::Plugin::$name"; - my $rv = eval "use $class; $class->load; 1;"; - return $mc->err($@) if ! $rv && $@ !~ /^Can\'t locate/; - return $rv; - }; - - my $rv = $load->($fn) || $load->(lc $fn) || $load->(ucfirst lc $fn); - return $mc->err($@) unless $rv; - - $PluginCase{lc $fn} = $last_case; - $plugins{$last_case} = $last_class; - - return $mc->ok; -} - -sub MANAGE_reload { - my $mc = shift->parse(qr/^reload (\w+)$/); - my ($fn) = $mc->args; - - my $class = $PluginCase{lc $fn} or - return $mc->err("Unknown/unloaded plugin '$fn'"); - $class = "Perlbal::Plugin::$class"; - - eval "$class->can_reload" or - return $mc->err("Plugin $class doesn't support reloading"); - - if ($class->can("pre_reload_unload")) { - eval "$class->pre_reload_unload; 1" or - return $mc->err("Error running $class->pre_reload_unload: $@"); - } - - eval "$class->unload; 1;" or - return $mc->err("Failed to unload $class: $@"); - - my $file = $class . ".pm"; - $file =~ s!::!/!g; - - delete $INC{$file} or - die $mc->err("Didn't find $file in %INC"); - - no warnings 'redefine'; - eval "use $class; $class->load; 1;" or - return $mc->err("Failed to reload: $@"); - - return $mc->ok; -} - -sub MANAGE_plugins { - my $mc = shift->no_opts; - foreach my $svc (values %service) { - next unless @{$svc->{plugin_order}}; - $mc->out(join(' ', $svc->{name}, @{$svc->{plugin_order}})); - } - $mc->end; -} - -sub MANAGE_help { - my $mc = shift->no_opts; - my @commands = sort map { m/^MANAGE_(\S+)$/ ? $1 : () } - keys %Perlbal::; - foreach my $command (@commands) { - $mc->out("$command"); - } - $mc->end; -} - -sub MANAGE_aio { - my $mc = shift->no_opts; - my $stats = Perlbal::AIO::get_aio_stats(); - foreach my $c (sort keys %$stats) { - my $r = $stats->{$c}; - foreach my $k (keys %$r) { - $mc->out("$c $k $r->{$k}"); - } - } - $mc->end; -} - -sub load_config { - my ($file, $writer) = @_; - open (my $fh, $file) or die "Error opening config file ($file): $!\n"; - my $ctx = Perlbal::CommandContext->new; - $ctx->verbose(0); - while (my $line = <$fh>) { - return 0 unless run_manage_command($line, $writer, $ctx); - } - close($fh); - return 1; -} - -sub daemonize { - my($pid, $sess_id, $i); - - # note that we're not in the foreground (for logging purposes) - $foreground = 0; - - # required before fork: (as of old Linux::AIO 1.1, still true?) - IO::AIO::max_parallel(0) - if $Perlbal::OPTMOD_IO_AIO; - - ## Fork and exit parent - if ($pid = fork) { exit 0; } - - ## Detach ourselves from the terminal - croak "Cannot detach from controlling terminal" - unless $sess_id = POSIX::setsid(); - - ## Prevent possibility of acquiring a controlling terminal - $SIG{'HUP'} = 'IGNORE'; - if ($pid = fork) { exit 0; } - - ## Change working directory - chdir "/"; - - ## Clear file creation mask - umask 0; - - ## Close open file descriptors - close(STDIN); - close(STDOUT); - close(STDERR); - - ## Reopen stderr, stdout, stdin to /dev/null - open(STDIN, "+>/dev/null"); - open(STDOUT, "+>&STDIN"); - open(STDERR, "+>&STDIN"); -} - -# For other apps using Danga::Socket that want to embed Perlbal, this can be called -# directly to start it up. You can call this as many times as you like; it'll -# only actually do what it does the first time it's called. -sub initialize { - unless ($run_started) { - $run_started = 1; - - # number of AIO threads. the number of outstanding requests isn't - # affected by this - IO::AIO::min_parallel(3) if $Perlbal::OPTMOD_IO_AIO; - - # register IO::AIO pipe which gets written to from threads - # doing blocking IO - if ($Perlbal::OPTMOD_IO_AIO) { - Perlbal::Socket->AddOtherFds(IO::AIO::poll_fileno() => - \&IO::AIO::poll_cb); - } - - # The fact that this only runs the first time someone calls initialize() - # means that some things which depend on it might be unreliable when - # used in an embedded perlbal if there is a race for multiple components - # to call initialize(). - run_global_hook("pre_event_loop"); - } -} - -# This is the function to call if you want Perlbal to be in charge of the event loop. -# It won't return until Perlbal is somehow told to exit. -sub run { - - # setup for logging - Sys::Syslog::openlog('perlbal', 'pid', 'daemon') if $Perlbal::SYSLOG_AVAILABLE; - $Perlbal::syslog_open = 1; - Perlbal::log('info', 'beginning run'); - my $pidfile_written = 0; - $pidfile_written = _write_pidfile( $pidfile ) if $pidfile; - - Perlbal::initialize(); - - Danga::Socket->SetLoopTimeout(1000); - Danga::Socket->SetPostLoopCallback(sub { - $Perlbal::tick_time = time(); - Perlbal::Socket::run_callbacks(); - return 1; - }); - - # begin the overall loop to try to capture if Perlbal dies at some point - # so we can have a log of it - eval { - # wait for activity - Perlbal::Socket->EventLoop(); - }; - - my $clean_exit = 1; - - # closing messages - if ($@) { - Perlbal::log('crit', "crash log: $_") foreach split(/\r?\n/, $@); - $clean_exit = 0; - } - - # Note: This will only actually remove the pidfile on 'shutdown graceful' - # A more reliable approach might be to have a pidfile object which fires - # removal on DESTROY. - _remove_pidfile( $pidfile ) if $pidfile_written; - - Perlbal::log('info', 'ending run'); - $Perlbal::syslog_open = 0; - Sys::Syslog::closelog() if $Perlbal::SYSLOG_AVAILABLE; - - return $clean_exit; -} - -sub log { - # simple logging functionality - if ($foreground) { - # syslog acts like printf so we have to use printf and append a \n - shift; # ignore the first parameter (info, warn, crit, etc) - printf(shift(@_) . "\n", @_); - } else { - # just pass the parameters to syslog - Sys::Syslog::syslog(@_) if $Perlbal::syslog_open; - } -} - - -sub _write_pidfile { - my $file = shift; - - my $fh; - unless (open($fh, ">$file")) { - Perlbal::log('info', "couldn't create pidfile '$file': $!" ); - return 0; - } - unless ((print $fh "$$\n") && close($fh)) { - Perlbal::log('info', "couldn't write into pidfile '$file': $!" ); - _remove_pidfile($file); - return 0; - } - return 1; -} - - -sub _remove_pidfile { - my $file = shift; - - unlink $file; - return 1; -} - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: - -1; diff --git a/lib/mogdeps/Perlbal/AIO.pm b/lib/mogdeps/Perlbal/AIO.pm deleted file mode 100644 index fac72abb..00000000 --- a/lib/mogdeps/Perlbal/AIO.pm +++ /dev/null @@ -1,284 +0,0 @@ -# AIO abstraction layer -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::AIO; - -use strict; -use POSIX qw(ENOENT EACCES EBADF); -use Fcntl qw(SEEK_CUR SEEK_SET SEEK_END O_RDWR O_CREAT O_TRUNC); - -# Try and use IO::AIO, if it's around. -BEGIN { - $Perlbal::OPTMOD_IO_AIO = eval "use IO::AIO 1.6 (); 1;"; -} - -END { - IO::AIO::max_parallel(0) - if $Perlbal::OPTMOD_IO_AIO; -} - -$Perlbal::AIO_MODE = "none"; -$Perlbal::AIO_MODE = "ioaio" if $Perlbal::OPTMOD_IO_AIO; - -############################################################################ -# AIO functions available to callers -############################################################################ - -sub aio_readahead { - my ($fh, $offset, $length, $user_cb) = @_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_readahead($fh, $offset, $length, $cb); - } else { - $cb->(); - } - }); -} - -sub aio_stat { - my ($file, $user_cb) = @_; - - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_stat($file, $cb); - } else { - stat($file); - $cb->(); - } - }); -} - -sub aio_open { - my ($file, $flags, $mode, $user_cb) = @_; - - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_open($file, $flags, $mode, $cb); - } else { - my $fh; - my $rv = sysopen($fh, $file, $flags, $mode); - $cb->($rv ? $fh : undef); - } - }); -} - -sub aio_unlink { - my ($file, $user_cb) = @_; - aio_channel_push(get_chan($file), $user_cb, sub { - my $cb = shift; - - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_unlink($file, $cb); - } else { - my $rv = unlink($file); - $rv = $rv ? 0 : -1; - $cb->($rv); - } - }); -} - -sub aio_write { - # 0 1 2 3(data) 4 - my ($fh, $offset, $length, undef, $user_cb) = @_; - return no_fh($user_cb) unless $fh; - my $alist = \@_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_write($fh, $offset, $length, $alist->[3], 0, $cb); - } else { - my $old_off = sysseek($fh, 0, SEEK_CUR); - sysseek($fh, $offset, 0); - my $rv = syswrite($fh, $alist->[3], $length, 0); - sysseek($fh, $old_off, SEEK_SET); - $cb->($rv); - } - }); -} - -sub aio_read { - # 0 1 2 3(data) 4 - my ($fh, $offset, $length, undef, $user_cb) = @_; - return no_fh($user_cb) unless $fh; - my $alist = \@_; - - aio_channel_push(get_chan(), $user_cb, sub { - my $cb = shift; - if ($Perlbal::AIO_MODE eq "ioaio") { - IO::AIO::aio_read($fh, $offset, $length, $alist->[3], 0, $cb); - } else { - my $old_off = sysseek($fh, 0, SEEK_CUR); - sysseek($fh, $offset, 0); - my $rv = sysread($fh, $alist->[3], $length, 0); - sysseek($fh, $old_off, SEEK_SET); - $cb->($rv); - } - }); -} - -############################################################################ -# AIO channel stuff -# prevents all AIO threads from being consumed by requests for same -# failing/overloaded disk by isolating them into separate 'channels' in -# parent process and not dispatching more than the max in-flight count -# allows. think of a channel as a named queue. or in reality, a disk. -############################################################################ - -my %chan_outstanding; # $channel_name -> $num_in_flight -my %chan_pending; # $channel_name -> [ [$subref, $cb], .... ] -my %chan_hitmaxdepth; # $channel_name -> $times_enqueued (not dispatched immediately) -my %chan_submitct; # $channel_name -> $times_submitted (total AIO requests for this channel) -my $use_aio_chans = 0; # keep them off for now, until mogstored code is ready to use them -my $file_to_chan_hook; # coderef that returns $chan_name given a $filename - -my %chan_concurrency; # $channel_name -> concurrency per channel - # (cache. definitive version via function call) - -sub get_aio_stats { - my $ret = {}; - foreach my $c (keys %chan_outstanding) { - $ret->{$c} = { - cur_running => $chan_outstanding{$c}, - ctr_queued => $chan_hitmaxdepth{$c} || 0, - ctr_total => $chan_submitct{$c}, - }; - } - - foreach my $c (keys %chan_pending) { - my $rec = $ret->{$c} ||= {}; - $rec->{cur_queued} = scalar @{$chan_pending{$c}}; - } - - return $ret; -} - -# (external API). set trans hook, but also enables AIO channels. -sub set_file_to_chan_hook { - $file_to_chan_hook = shift; # coderef that returns $chan_name given a $filename - $use_aio_chans = 1; -} - -# internal API: -sub aio_channel_push { - my ($chan, $user_cb, $action) = @_; - - # if we were to do it immediately, bypassing AIO channels (future option?) - unless ($use_aio_chans) { - $action->($user_cb); - return; - } - - # IO::AIO/etc only take one callback. so we wrap the user - # (caller) function with our own that first calls theirs, then - # does our bookkeeping and queue management afterwards. - my $wrapped_cb = sub { - $user_cb->(@_); - $chan_outstanding{$chan}--; - aio_channel_cond_run($chan); - }; - - # in case this is the first time this queue has been used, init stuff: - my $chanpend = ($chan_pending{$chan} ||= []); - $chan_outstanding{$chan} ||= 0; - $chan_submitct{$chan}++; - - my $max_out = $chan_concurrency{$chan} ||= aio_chan_max_concurrent($chan); - - if ($chan_outstanding{$chan} < $max_out) { - $chan_outstanding{$chan}++; - $action->($wrapped_cb); - return; - } else { - # too deep. enqueue. - $chan_hitmaxdepth{$chan}++; - push @$chanpend, [$action, $wrapped_cb]; - } -} - -sub aio_chan_max_concurrent { - my ($chan) = @_; - return 100 if $chan eq '[default]'; - return 10; -} - -sub aio_channel_cond_run { - my ($chan) = @_; - - my $chanpend = $chan_pending{$chan} or return; - my $max_out = $chan_concurrency{$chan} ||= aio_chan_max_concurrent($chan); - - my $job; - while ($chan_outstanding{$chan} < $max_out && ($job = shift @$chanpend)) { - $chan_outstanding{$chan}++; - $job->[0]->($job->[1]); - } -} - -my $next_chan; -sub set_channel { - $next_chan = shift; -} - -sub set_file_for_channel { - my ($file) = @_; - if ($file_to_chan_hook) { - $next_chan = $file_to_chan_hook->($file); - } else { - $next_chan = undef; - } -} - -# gets currently-set channel, then clears it. or if none set, -# lets registered hook set the channel name from the optional -# $file parameter. the default channel, '[default]' has no limits -sub get_chan { - return undef unless $use_aio_chans; - my ($file) = @_; - set_file_for_channel($file) if $file; - - if (my $chan = $next_chan) { - $next_chan = undef; - return $chan; - } - - return "[default]"; -} - -############################################################################ -# misc util functions -############################################################################ - -sub _fh_of_fd_mode { - my ($fd, $mode) = @_; - return undef unless defined $fd && $fd >= 0; - - #TODO: use the write MODE for the given $mode; - my $fh = IO::Handle->new_from_fd($fd, 'r+'); - my $num = fileno($fh); - return $fh; -} - -sub no_fh { - my $cb = shift; - - my $i = 1; - my $stack_trace = ""; - while (my ($pkg, $filename, $line, $subroutine, $hasargs, - $wantarray, $evaltext, $is_require, $hints, $bitmask) = caller($i++)) { - $stack_trace .= " at $filename:$line $subroutine\n"; - } - - Perlbal::log("crit", "Undef \$fh: $stack_trace"); - $cb->(undef); - return undef; -} - -1; diff --git a/lib/mogdeps/Perlbal/BackendHTTP.pm b/lib/mogdeps/Perlbal/BackendHTTP.pm deleted file mode 100644 index f3d0b382..00000000 --- a/lib/mogdeps/Perlbal/BackendHTTP.pm +++ /dev/null @@ -1,755 +0,0 @@ -###################################################################### -# HTTP connection to backend node -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::BackendHTTP; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('client', # Perlbal::ClientProxy connection, or undef - 'service', # Perlbal::Service - 'pool', # Perlbal::Pool; whatever pool we spawned from - 'ip', # IP scalar - 'port', # port scalar - 'ipport', # "$ip:$port" - 'reportto', # object; must implement reporter interface - - 'has_attention', # has been accepted by a webserver and - # we know for sure we're not just talking - # to the TCP stack - - 'waiting_options', # if true, we're waiting for an OPTIONS * - # response to determine when we have attention - - 'disconnect_at', # time this connection will be disconnected, - # if it's kept-alive and backend told us. - # otherwise undef for unknown. - - # The following only apply when the backend server sends - # a content-length header - 'content_length', # length of document being transferred - 'content_length_remain', # bytes remaining to be read - - 'use_count', # number of requests this backend's been used for - 'generation', # int; counts what generation we were spawned in - 'buffered_upload_mode', # bool; if on, we're doing a buffered upload transmit - - 'scratch' # for plugins - ); -use Socket qw(PF_INET IPPROTO_TCP SOCK_STREAM SOL_SOCKET SO_ERROR - AF_UNIX PF_UNSPEC - ); -use IO::Handle; - -use Perlbal::ClientProxy; - -# if this is made too big, (say, 128k), then perl does malloc instead -# of using its slab cache. -use constant BACKEND_READ_SIZE => 61449; # 60k, to fit in a 64k slab - -# keys set here when an endpoint is found to not support persistent -# connections and/or the OPTIONS method -our %NoVerify; # { "ip:port" => next-verify-time } -our %NodeStats; # { "ip:port" => { ... } }; keep statistics about nodes - -# constructor for a backend connection takes a service (pool) that it's -# for, and uses that service to get its backend IP/port, as well as the -# client that will be using this backend connection. final parameter is -# an options hashref that contains some options: -# reportto => object obeying reportto interface -sub new { - my Perlbal::BackendHTTP $self = shift; - my ($svc, $ip, $port, $opts) = @_; - $opts ||= {}; - - my $sock; - socket $sock, PF_INET, SOCK_STREAM, IPPROTO_TCP; - - unless ($sock && defined fileno($sock)) { - Perlbal::log('crit', "Error creating socket: $!"); - return undef; - } - my $inet_aton = Socket::inet_aton($ip); - unless ($inet_aton) { - Perlbal::log('crit', "inet_aton failed creating socket for $ip"); - return undef; - } - - IO::Handle::blocking($sock, 0); - connect $sock, Socket::sockaddr_in($port, $inet_aton); - - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); - - Perlbal::objctor($self); - - $self->{ip} = $ip; # backend IP - $self->{port} = $port; # backend port - $self->{ipport} = "$ip:$port"; # often used as key - $self->{service} = $svc; # the service we're serving for - $self->{pool} = $opts->{pool}; # what pool we came from. - $self->{reportto} = $opts->{reportto} || $svc; # reportto if specified - $self->state("connecting"); - - # mark another connection to this ip:port - $NodeStats{$self->{ipport}}->{attempts}++; - $NodeStats{$self->{ipport}}->{lastattempt} = $self->{create_time}; - - # setup callback in case we get stuck in connecting land - Perlbal::Socket::register_callback(15, sub { - if ($self->state eq 'connecting' || $self->state eq 'verifying_backend') { - # shouldn't still be connecting/verifying ~15 seconds after create - $self->close('callback_timeout'); - } - return 0; - }); - - # for header reading: - $self->init; - - $self->watch_write(1); - return $self; -} - -sub init { - my $self = shift; - $self->{req_headers} = undef; - $self->{res_headers} = undef; # defined w/ headers object once all headers in - $self->{headers_string} = ""; # blank to start - $self->{generation} = $self->{service}->{generation}; - $self->{read_size} = 0; # total bytes read from client - - $self->{client} = undef; # Perlbal::ClientProxy object, initially empty - # until we ask our service for one - - $self->{has_attention} = 0; - $self->{use_count} = 0; - $self->{buffered_upload_mode} = 0; -} - - -sub new_process { - my ($class, $svc, $prog) = @_; - - my ($psock, $csock); - socketpair($csock, $psock, AF_UNIX, SOCK_STREAM, PF_UNSPEC) - or die "socketpair: $!"; - - $csock->autoflush(1); - $psock->autoflush(1); - - my $pid = fork; - unless (defined $pid) { - warn "fork failed: $!\n"; - return undef; - } - - # child process - unless ($pid) { - close(STDIN); - close(STDOUT); - #close(STDERR); - open(STDIN, '<&', $psock); - open(STDOUT, '>&', $psock); - #open(STDERR, ">/dev/null"); - exec $prog; - } - - close($psock); - my $sock = $csock; - - my $self = fields::new($class); - $self->SUPER::new($sock); - Perlbal::objctor($self); - - $self->{ipport} = $prog; # often used as key - $self->{service} = $svc; # the service we're serving for - $self->{reportto} = $svc; # reportto interface (same as service) - $self->state("connecting"); - - $self->init; - $self->watch_write(1); - return $self; -} - -sub close { - my Perlbal::BackendHTTP $self = shift; - - # OSX Gives EPIPE on bad connects, and doesn't fail the connect - # so lets treat EPIPE as a event_err so the logic there does - # the right thing - if (defined $_[0] && $_[0] eq 'EPIPE') { - $self->event_err; - return; - } - - # don't close twice - return if $self->{closed}; - - # this closes the socket and sets our closed flag - $self->SUPER::close(@_); - - # tell our client that we're gone - if (my $client = $self->{client}) { - $client->backend(undef); - $self->{client} = undef; - } - - # tell our owner that we're gone - if (my $reportto = $self->{reportto}) { - $reportto->note_backend_close($self); - $self->{reportto} = undef; - } -} - -# return our defined generation counter with no parameter, -# or set our generation if given a parameter -sub generation { - my Perlbal::BackendHTTP $self = $_[0]; - return $self->{generation} unless $_[1]; - return $self->{generation} = $_[1]; -} - -# return what ip and port combination we're using -sub ipport { - my Perlbal::BackendHTTP $self = $_[0]; - return $self->{ipport}; -} - -# called to tell backend that the client has gone on to do something else now. -sub forget_client { - my Perlbal::BackendHTTP $self = $_[0]; - $self->{client} = undef; -} - -# called by service when it's got a client for us, or by ourselves -# when we asked for a client. -# returns true if client assignment was accepted. -sub assign_client { - my Perlbal::BackendHTTP $self = shift; - my Perlbal::ClientProxy $client = shift; - return 0 if $self->{client}; - - my $svc = $self->{service}; - - # set our client, and the client's backend to us - $svc->mark_node_used($self->{ipport}); - $self->{client} = $client; - $self->state("sending_req"); - $self->{client}->backend($self); - - my Perlbal::HTTPHeaders $hds = $client->{req_headers}->clone; - $self->{req_headers} = $hds; - - my $client_ip = $client->peer_ip_string; - - # I think I've seen this be undef in practice. Double-check - unless ($client_ip) { - warn "Undef client_ip ($client) in assign_client. Closing."; - $client->close; - return 0; - } - - # Use HTTP/1.0 to backend (FIXME: use 1.1 and support chunking) - $hds->set_version("1.0"); - - my $persist = $svc->{persist_backend}; - - $hds->header("Connection", $persist ? "keep-alive" : "close"); - - if ($svc->{enable_reproxy}) { - $hds->header("X-Proxy-Capabilities", "reproxy-file"); - } - - # decide whether we trust the upstream or not, to give us useful - # forwarding info headers - if ($svc->trusted_ip($client_ip)) { - # yes, we trust our upstream, so just append our client's IP - # to the existing list of forwarded IPs, if we're a blind proxy - # then don't append our IP to the end of the list. - unless ($svc->{blind_proxy}) { - my @ips = split /,\s*/, ($hds->header("X-Forwarded-For") || ''); - $hds->header("X-Forwarded-For", join ", ", @ips, $client_ip); - } - } else { - # no, don't trust upstream (untrusted client), so remove all their - # forwarding headers and tag their IP as the x-forwarded-for - $hds->header("X-Forwarded-For", $client_ip); - $hds->header("X-Host", undef); - $hds->header("X-Forwarded-Host", undef); - } - - $self->tcp_cork(1); - $client->state('backend_req_sent'); - - $self->{content_length} = undef; - $self->{content_length_remain} = undef; - - # run hooks - return 1 if $svc->run_hook('backend_client_assigned', $self); - - # now cleanup the headers before we send to the backend - $svc->munge_headers($hds) if $svc; - - $self->write($hds->to_string_ref); - $self->write(sub { - $self->tcp_cork(0); - if (my $client = $self->{client}) { - # start waiting on a reply - $self->watch_read(1); - $self->state("wait_res"); - $client->state('wait_res'); - $client->backend_ready($self); - } - }); - - return 1; -} - -# called by ClientProxy after we tell it our backend is ready and -# it has an upload ready on disk -sub invoke_buffered_upload_mode { - my Perlbal::BackendHTTP $self = shift; - - # so, we're receiving a buffered upload, we need to go ahead and - # start the buffered upload retransmission to backend process. we - # have to turn watching for writes on, since that's what is doing - # the triggering, NOT the normal client proxy watch for read - $self->{buffered_upload_mode} = 1; - $self->watch_write(1); -} - -# Backend -sub event_write { - my Perlbal::BackendHTTP $self = shift; - print "Backend $self is writeable!\n" if Perlbal::DEBUG >= 2; - - my $now = time(); - delete $NoVerify{$self->{ipport}} if - defined $NoVerify{$self->{ipport}} && - $NoVerify{$self->{ipport}} < $now; - - if (! $self->{client} && $self->{state} eq "connecting") { - # not interested in writes again until something else is - $self->watch_write(0); - $NodeStats{$self->{ipport}}->{connects}++; - $NodeStats{$self->{ipport}}->{lastconnect} = $now; - - # OSX returns writeable even if the connect fails - # so explicitly check for the error - # TODO: make a smaller test case and show to the world - if (my $error = unpack('i', getsockopt($self->{sock}, SOL_SOCKET, SO_ERROR))) { - $self->event_err; - return; - } - - if (defined $self->{service} && $self->{service}->{verify_backend} && - !$self->{has_attention} && !defined $NoVerify{$self->{ipport}}) { - - return if $self->{service}->run_hook('backend_write_verify', $self); - - # the backend should be able to answer this incredibly quickly. - $self->write("OPTIONS " . $self->{service}->{verify_backend_path} . " HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"); - $self->watch_read(1); - $self->{waiting_options} = 1; - $self->{content_length_remain} = undef; - $self->state("verifying_backend"); - } else { - # register our boredom (readiness for a client/request) - $self->state("bored"); - $self->{reportto}->register_boredom($self); - } - return; - } - - # if we have a client, and we're currently doing a buffered upload - # sendfile, then tell the client to continue sending us data - if ($self->{client} && $self->{buffered_upload_mode}) { - $self->{client}->continue_buffered_upload($self); - return; - } - - my $done = $self->write(undef); - $self->watch_write(0) if $done; -} - -sub verify_success { - my Perlbal::BackendHTTP $self = shift; - $self->{waiting_options} = 0; - $self->{has_attention} = 1; - $NodeStats{$self->{ipport}}->{verifies}++; - $self->next_request(1); # initial - return; -} - -sub verify_failure { - my Perlbal::BackendHTTP $self = shift; - $NoVerify{$self->{ipport}} = time() + 60; - $self->{reportto}->note_bad_backend_connect($self); - $self->close('no_keep_alive'); - return; -} - -sub event_read_waiting_options { # : void - my Perlbal::BackendHTTP $self = shift; - - if (defined $self->{service}) { - return if $self->{service}->run_hook('backend_readable_verify', $self); - } - - if ($self->{content_length_remain}) { - # the HTTP/1.1 spec says OPTIONS responses can have content-lengths, - # but the meaning of the response is reserved for a future spec. - # this just gobbles it up for. - my $bref = $self->read(BACKEND_READ_SIZE); - return $self->verify_failure unless defined $bref; - $self->{content_length_remain} -= length($$bref); - } elsif (my $hd = $self->read_response_headers) { - # see if we have keep alive support - return $self->verify_failure unless $hd->res_keep_alive_options; - $self->{content_length_remain} = $hd->header("Content-Length"); - } - - # if we've got the option response and read any response data - # if present: - if ($self->{res_headers} && ! $self->{content_length_remain}) { - $self->verify_success; - } - return; -} - -sub handle_response { # : void - my Perlbal::BackendHTTP $self = shift; - my Perlbal::HTTPHeaders $hd = $self->{res_headers}; - my Perlbal::ClientProxy $client = $self->{client}; - - print "BackendHTTP: handle_response\n" if Perlbal::DEBUG >= 2; - - my $res_code = $hd->response_code; - - # keep a rolling window of the last 500 response codes - my $ref = ($NodeStats{$self->{ipport}}->{responsecodes} ||= []); - push @$ref, $res_code; - if (scalar(@$ref) > 500) { - shift @$ref; - } - - # call service response received function - return if $self->{reportto}->backend_response_received($self); - - # standard handling - $self->state("xfer_res"); - $client->state("xfer_res"); - $self->{has_attention} = 1; - - # RFC 2616, Sec 4.4: Messages MUST NOT include both a - # Content-Length header field and a non-identity - # transfer-coding. If the message does include a non- - # identity transfer-coding, the Content-Length MUST be - # ignored. - my $te = $hd->header("Transfer-Encoding"); - if ($te && $te !~ /\bidentity\b/i) { - $hd->header("Content-Length", undef); - } - - my Perlbal::HTTPHeaders $rqhd = $self->{req_headers}; - - # setup our content length so we know how much data to expect, in general - # we want the content-length from the response, but if this was a head request - # we know it's a 0 length message the client wants - if ($rqhd->request_method eq 'HEAD') { - $self->{content_length} = 0; - } else { - $self->{content_length} = $hd->content_length; - } - $self->{content_length_remain} = $self->{content_length} || 0; - - my $reproxy_cache_for = $hd->header('X-REPROXY-CACHE-FOR') || 0; - - # special cases: reproxying and retrying after server errors: - if ((my $rep = $hd->header('X-REPROXY-FILE')) && $self->may_reproxy) { - # make the client begin the async IO while we move on - $self->next_request; - $client->start_reproxy_file($rep, $hd); - return; - } elsif ((my $urls = $hd->header('X-REPROXY-URL')) && $self->may_reproxy) { - $self->next_request; - $self->{service}->add_to_reproxy_url_cache($rqhd, $hd) - if $reproxy_cache_for; - $client->start_reproxy_uri($hd, $urls); - return; - } elsif ((my $svcname = $hd->header('X-REPROXY-SERVICE')) && $self->may_reproxy) { - $self->next_request; - $self->{client} = undef; - $client->start_reproxy_service($hd, $svcname); - return; - } elsif ($res_code == 500 && - $rqhd->request_method =~ /^GET|HEAD$/ && - $client->should_retry_after_500($self)) { - # eh, 500 errors are rare. just close and don't spend effort reading - # rest of body's error message to no client. - $self->close; - - # and tell the client to try again with a new backend - $client->retry_after_500($self->{service}); - return; - } - - # regular path: - my $res_source = $client->{primary_res_hdrs} || $hd; - my $thd = $client->{res_headers} = $res_source->clone; - - # setup_keepalive will set Connection: and Keep-Alive: headers for us - # as well as setup our HTTP version appropriately - $client->setup_keepalive($thd); - - # if we had an alternate primary response header, make sure - # we send the real content-length (from the reproxied URL) - # and not the one the first server gave us - if ($client->{primary_res_hdrs}) { - $thd->header('Content-Length', $hd->header('Content-Length')); - $thd->header('X-REPROXY-FILE', undef); - $thd->header('X-REPROXY-URL', undef); - $thd->header('X-REPROXY-EXPECTED-SIZE', undef); - $thd->header('X-REPROXY-CACHE-FOR', undef); - - # also update the response code, in case of 206 partial content - my $rescode = $hd->response_code; - if ($rescode == 206 || $rescode == 416) { - $thd->code($rescode); - $thd->header('Accept-Ranges', $hd->header('Accept-Ranges')) if $hd->header('Accept-Ranges'); - $thd->header('Content-Range', $hd->header('Content-Range')) if $hd->header('Content-Range'); - } - $thd->code(200) if $thd->response_code == 204; # upgrade HTTP No Content (204) to 200 OK. - } - - print " writing response headers to client\n" if Perlbal::DEBUG >= 3; - $client->write($thd->to_string_ref); - - print(" content_length=", (defined $self->{content_length} ? $self->{content_length} : "(undef)"), - " remain=", (defined $self->{content_length_remain} ? $self->{content_length_remain} : "(undef)"), "\n") - if Perlbal::DEBUG >= 3; - - if (defined $self->{content_length} && ! $self->{content_length_remain}) { - print " done. detaching.\n" if Perlbal::DEBUG >= 3; - # order important: next_request detaches us from client, so - # $client->close can't kill us - $self->next_request; - $client->write(sub { - $client->backend_finished; - }); - } -} - -sub may_reproxy { - my Perlbal::BackendHTTP $self = shift; - my Perlbal::Service $svc = $self->{service}; - return 0 unless $svc; - return $svc->{enable_reproxy}; -} - -# Backend -sub event_read { - my Perlbal::BackendHTTP $self = shift; - print "Backend $self is readable!\n" if Perlbal::DEBUG >= 2; - - return $self->event_read_waiting_options if $self->{waiting_options}; - - my Perlbal::ClientProxy $client = $self->{client}; - - # with persistent connections, sometimes we have a backend and - # no client, and backend becomes readable, either to signal - # to use the end of the stream, or because a bad request error, - # which I can't totally understand. in any case, we have - # no client so all we can do is close this backend. - return $self->close('read_with_no_client') unless $client; - - unless ($self->{res_headers}) { - return unless $self->read_response_headers; - return $self->handle_response; - } - - # if our client's behind more than the max limit, stop buffering - if ($client->too_far_behind_backend) { - $self->watch_read(0); - $client->{backend_stalled} = 1; - return; - } - - my $bref = $self->read(BACKEND_READ_SIZE); - - if (defined $bref) { - $client->write($bref); - - # HTTP/1.0 keep-alive support to backend. we just count bytes - # until we hit the end, then we know we can send another - # request on this connection - if ($self->{content_length}) { - $self->{content_length_remain} -= length($$bref); - if (! $self->{content_length_remain}) { - # order important: next_request detaches us from client, so - # $client->close can't kill us - $self->next_request; - $client->write(sub { $client->backend_finished; }); - } - } - return; - } else { - # backend closed - print "Backend $self is done; closing...\n" if Perlbal::DEBUG >= 1; - - $client->backend(undef); # disconnect ourselves from it - $self->{client} = undef; # .. and it from us - $self->close('backend_disconnect'); # close ourselves - - $client->write(sub { $client->backend_finished; }); - return; - } -} - -# if $initial is on, then don't increment use count -sub next_request { - my Perlbal::BackendHTTP $self = $_[0]; - my $initial = $_[1]; - - # don't allow this if we're closed - return if $self->{closed}; - - # set alive_time so reproxy can intelligently reuse this backend - my $now = time(); - $self->{alive_time} = $now; - $NodeStats{$self->{ipport}}->{requests}++ unless $initial; - $NodeStats{$self->{ipport}}->{lastresponse} = $now; - - my $hd = $self->{res_headers}; # response headers - - # verify that we have keep-alive support. by passing $initial to res_keep_alive, - # we signal that req_headers may be undef (if we just did an options request) - return $self->close('next_request_no_persist') - unless $hd->res_keep_alive($self->{req_headers}, $initial); - - # and now see if we should closed based on the pool we're from - return $self->close('pool_requested_closure') - if $self->{pool} && ! $self->{pool}->backend_should_live($self); - - # we've been used - $self->{use_count}++ unless $initial; - - # service specific - if (my Perlbal::Service $svc = $self->{service}) { - # keep track of how many times we've been used, and don't - # keep using this connection more times than the service - # is configured for. - if ($svc->{max_backend_uses} && ($self->{use_count} > $svc->{max_backend_uses})) { - return $self->close('exceeded_max_uses'); - } - } - - # if backend told us, keep track of when the backend - # says it's going to boot us, so we don't use it within - # a few seconds of that time - if (($hd->header("Keep-Alive") || '') =~ /\btimeout=(\d+)/i) { - $self->{disconnect_at} = $now + $1; - } else { - $self->{disconnect_at} = undef; - } - - $self->{client} = undef; - - $self->state("bored"); - $self->watch_write(0); - - $self->{req_headers} = undef; - $self->{res_headers} = undef; - $self->{headers_string} = ""; - $self->{req_headers} = undef; - - $self->{read_size} = 0; - $self->{content_length_remain} = undef; - $self->{content_length} = undef; - $self->{buffered_upload_mode} = 0; - - $self->{reportto}->register_boredom($self); - return; -} - -# Backend: bad connection to backend -sub event_err { - my Perlbal::BackendHTTP $self = shift; - - # FIXME: we get this after backend is done reading and we disconnect, - # hence the misc checks below for $self->{client}. - - print "BACKEND event_err\n" if - Perlbal::DEBUG >= 2; - - if ($self->{client}) { - # request already sent to backend, then an error occurred. - # we don't want to duplicate POST requests, so for now - # just fail - # TODO: if just a GET request, retry? - $self->{client}->close('backend_error'); - $self->close('error'); - return; - } - - if ($self->{state} eq "connecting" || - $self->{state} eq "verifying_backend") { - # then tell the service manager that this connection - # failed, so it can spawn a new one and note the dead host - $self->{reportto}->note_bad_backend_connect($self, 1); - } - - # close ourselves first - $self->close("error"); -} - -# Backend -sub event_hup { - my Perlbal::BackendHTTP $self = shift; - print "HANGUP for $self\n" if Perlbal::DEBUG; - $self->close("after_hup"); -} - -sub as_string { - my Perlbal::BackendHTTP $self = shift; - - my $ret = $self->SUPER::as_string; - my $name = $self->{sock} ? getsockname($self->{sock}) : undef; - my $lport = $name ? (Socket::sockaddr_in($name))[0] : undef; - $ret .= ": localport=$lport" if $lport; - if (my Perlbal::ClientProxy $cp = $self->{client}) { - $ret .= "; client=$cp->{fd}"; - } - $ret .= "; uses=$self->{use_count}; $self->{state}"; - if (defined $self->{service} && $self->{service}->{verify_backend}) { - $ret .= "; has_attention="; - $ret .= $self->{has_attention} ? 'yes' : 'no'; - } - - return $ret; -} - -sub die_gracefully { - # see if we need to die - my Perlbal::BackendHTTP $self = shift; - $self->close('graceful_death') if $self->state eq 'bored'; -} - -sub DESTROY { - Perlbal::objdtor($_[0]); - $_[0]->SUPER::DESTROY; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Cache.pm b/lib/mogdeps/Perlbal/Cache.pm deleted file mode 100644 index 8bff5b37..00000000 --- a/lib/mogdeps/Perlbal/Cache.pm +++ /dev/null @@ -1,244 +0,0 @@ -# (This is a copy of Cache::SimpleLRU.) -# License to use and redistribute this under the same terms as Perl itself. - -package Perlbal::Cache; - -use strict; -use fields qw(items size tail head maxsize); - -use vars qw($VERSION); -use constant PREVREF => 0; # ptr left, to newer item -use constant VALUE => 1; -use constant NEXTREF => 2; # ptr right, to older item -use constant KEY => 3; # copy of key for unlinking from namespace on fallout - -$VERSION = '1.0'; - -sub new { - my $class = shift; - my $self = fields::new($class); - my $args = @_ == 1 ? $_[0] : { @_ }; - - $self->{head} = undef, - $self->{tail} = undef, - $self->{items} = {}; # key -> arrayref, indexed by constants above - $self->{size} = 0; - $self->{maxsize} = $args->{maxsize}+0; - return $self; -} - -# need to DESTROY to cleanup doubly-linked list (circular refs) -sub DESTROY { - my $self = shift; - $self->set_maxsize(0); - $self->validate_list; -} - -# calls $code->($val) for each value in cache. $code must return true -# to continue walking. foreach returns true if you hit the end. -sub foreach { - my Perlbal::Cache $self = shift; - my $code = shift; - my $iter = $self->{head}; - while ($iter) { - my $val = $iter->[VALUE]; - $iter = $iter->[NEXTREF]; - last unless $code->($val); - } - return $iter ? 0 : 1; -} - -sub size { - my Perlbal::Cache $self = shift; - return $self->{size}; -} - -sub maxsize { - my Perlbal::Cache $self = shift; - return $self->{maxsize}; -} - -sub set_maxsize { - my ($self, $maxsize) = @_; - $self->{maxsize} = $maxsize; - $self->drop_tail while - $self->{size} > $self->{maxsize}; -} - -# For debugging only -sub validate_list { - my ($self) = @_; - - die "no tail pointer\n" if $self->{size} && ! $self->{tail}; - die "no head pointer\n" if $self->{size} && ! $self->{head}; - die "unwanted tail pointer\n" if ! $self->{size} && $self->{tail}; - die "unwanted head pointer\n" if ! $self->{size} && $self->{head}; - - my $iter = $self->{head}; - my $last = undef; - my $count = 1; - while ($count <= $self->{size}) { - if (! defined $iter) { - die "undefined iterator on element \#$count (trying to get to size $self->{size})\n"; - } - my $key = $iter->[KEY]; - my $it_via_hash = $self->{items}->{$key} or - die "item '$key' found in list, but not in hash\n"; - - unless ($it_via_hash == $iter) { - die "Hash value of '$key' maps to different node than we found.\n"; - } - - if ($count == 1 && $iter->[PREVREF]) { - die "Head element shouldn't have previous pointer!\n"; - } - if ($count == $self->{size} && $iter->[NEXTREF]) { - die "Last element shouldn't have next pointer!\n"; - } - if ($iter->[NEXTREF] && $iter->[NEXTREF]->[PREVREF] != $iter) { - die "next's previous should be us.\n"; - } - if ($last && $iter->[PREVREF] != $last) { - die "defined \$last but its previous isn't us.\n"; - } - if ($last && $last->[NEXTREF] != $iter) { - die "defined \$last but our next isn't it\n"; - } - if (!$last && $iter->[PREVREF]) { - die "uh, we have a nextref but shouldn't\n"; - } - - $last = $iter; - $iter = $iter->[NEXTREF]; - $count++; - } - return 1; -} - -sub drop_tail { - my Perlbal::Cache $self = shift; - die "no tail (size)" unless $self->{size}; - - ## who's going to die? - my $to_die = $self->{tail} or die "no tail (key)"; - - ## set the tail to the item before the one dying. - $self->{tail} = $self->{tail}->[PREVREF]; - - ## adjust the forward pointer on the tail to be undef - if (defined $self->{tail}) { - $self->{tail}->[NEXTREF] = undef; - } - - ## kill the item - delete $self->{items}->{$to_die->[KEY]}; - - ## shrink the overall size - $self->{size}--; - - if (!$self->{size}) { - $self->{head} = undef; - } -} - -sub get { - my Perlbal::Cache $self = shift; - my ($key) = @_; - - my $item = $self->{items}{$key} or - return undef; - - # promote this to the head - unless ($self->{head} == $item) { - if ($self->{tail} == $item) { - $self->{tail} = $item->[PREVREF]; - } - - # remove this element from the linked list. - my $next = $item->[NEXTREF]; - my $prev = $item->[PREVREF]; - if ($next) { $next->[PREVREF] = $prev; } - if ($prev) { $prev->[NEXTREF] = $next; } - - # make current head point backwards to this item - $self->{head}->[PREVREF] = $item; - - # make this item point forwards to current head, and backwards nowhere - $item->[NEXTREF] = $self->{head}; - $item->[PREVREF] = undef; - - # make this the new head - $self->{head} = $item; - } - - return $item->[VALUE]; -} - -sub remove { - my Perlbal::Cache $self = shift; - my ($key) = @_; - - my $item = $self->{items}{$key} or - return 0; - delete $self->{items}{$key}; - $self->{size}--; - - if (!$self->{size}) { - $self->{head} = undef; - $self->{tail} = undef; - return 1; - } - - if ($self->{head} == $item) { - $self->{head} = $item->[NEXTREF]; - $self->{head}->[PREVREF] = undef; - return 1; - } - if ($self->{tail} == $item) { - $self->{tail} = $item->[PREVREF]; - $self->{tail}->[NEXTREF] = undef; - return 1; - } - - # remove from middle - $item->[PREVREF]->[NEXTREF] = $item->[NEXTREF]; - $item->[NEXTREF]->[PREVREF] = $item->[PREVREF]; - return 1; - -} - -sub set { - my Perlbal::Cache $self = shift; - my ($key, $value) = @_; - - $self->drop_tail while - $self->{maxsize} && - $self->{size} >= $self->{maxsize} && - ! exists $self->{items}->{$key}; - - if (exists $self->{items}->{$key}) { - # update the value - my $it = $self->{items}->{$key}; - $it->[VALUE] = $value; - } else { - # stick it at the end, for now - my $it = $self->{items}->{$key} = []; - $it->[PREVREF] = undef; - $it->[NEXTREF] = undef; - $it->[KEY] = $key; - $it->[VALUE] = $value; - if ($self->{size}) { - $self->{tail}->[NEXTREF] = $it; - $it->[PREVREF] = $self->{tail}; - } else { - $self->{head} = $it; - } - $self->{tail} = $it; - $self->{size}++; - } - - # this will promote it to the top: - $self->get($key); -} - -1; diff --git a/lib/mogdeps/Perlbal/ChunkedUploadState.pm b/lib/mogdeps/Perlbal/ChunkedUploadState.pm deleted file mode 100644 index be1ec8a8..00000000 --- a/lib/mogdeps/Perlbal/ChunkedUploadState.pm +++ /dev/null @@ -1,64 +0,0 @@ -package Perlbal::ChunkedUploadState; -use strict; - -sub new { - my ($pkg, %args) = @_; - my $self = bless { - 'buf' => '', - 'bytes_remain' => 0, # remaining in chunk (ignoring final 2 byte CRLF) - }, $pkg; - foreach my $k (qw(on_new_chunk on_disconnect on_zero_chunk)) { - $self->{$k} = (delete $args{$k}) || sub {}; - } - die "bogus args" if %args; - return $self; -} - -sub on_readable { - my ($self, $ds) = @_; - my $rbuf = $ds->read(131072); - unless (defined $rbuf) { - $self->{on_disconnect}->(); - return; - } - - $self->{buf} .= $$rbuf; - - while ($self->drive_machine) {} -} - -# returns 1 if progress was made parsing buffer -sub drive_machine { - my $self = shift; - - my $buflen = length($self->{buf}); - return 0 unless $buflen; - - if (my $br = $self->{bytes_remain}) { - my $extract = $buflen > $br ? $br : $buflen; - my $ch = substr($self->{buf}, 0, $extract, ''); - $self->{bytes_remain} -= $extract; - die "assert" if $self->{bytes_remain} < 0; - $self->{on_new_chunk}->(\$ch); - return 1; - } - - return 0 unless $self->{buf} =~ s/^(?:\r\n)?([0-9a-fA-F]+)(?:;.*)?\r\n//; - $self->{bytes_remain} = hex($1); - - if ($self->{bytes_remain} == 0) { - # FIXME: new state machine state for trailer parsing/discarding. - # (before we do on_zero_chunk). for now, though, just assume - # no trailers and throw away the extra post-trailer \r\n that - # is probably in this packet. hacky. - $self->{buf} =~ s/^\r\n//; - $self->{hit_zero} = 1; - $self->{on_zero_chunk}->(); - return 0; - } - return 1; -} - -sub hit_zero_chunk { $_[0]{hit_zero} } - -1; diff --git a/lib/mogdeps/Perlbal/ClientHTTP.pm b/lib/mogdeps/Perlbal/ClientHTTP.pm deleted file mode 100644 index c4919a2d..00000000 --- a/lib/mogdeps/Perlbal/ClientHTTP.pm +++ /dev/null @@ -1,475 +0,0 @@ -###################################################################### -# HTTP Connection from a reverse proxy client. GET/HEAD only. -# most functionality is implemented in the base class. -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ClientHTTP; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::ClientHTTPBase"; -use Perlbal::Util; - -use fields ('put_in_progress', # 1 when we're currently waiting for an async job to return - 'put_fh', # file handle to use for writing data - 'put_fh_filename', # filename of put_fh - 'put_pos', # file offset to write next data at - - 'content_length', # length of document being transferred - 'content_length_remain', # bytes remaining to be read - 'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef - ); - -use HTTP::Date (); -use File::Path; - -use Errno qw( EPIPE ); -use POSIX qw( O_CREAT O_TRUNC O_WRONLY O_RDONLY ENOENT ); - -# class list of directories we know exist -our (%VerifiedDirs); - -sub new { - my $class = shift; - - my $self = fields::new($class); - $self->SUPER::new(@_); - $self->init; - return $self; -} - -# upcasting a generic ClientHTTPBase (from a service selector) to a -# "full-fledged" ClientHTTP. -sub new_from_base { - my $class = shift; - my Perlbal::ClientHTTPBase $cb = shift; # base object - Perlbal::Util::rebless($cb, $class); - $cb->init; - - $cb->watch_read(1); # enable our reads, so we can get PUT/POST data - $cb->handle_request; # this will disable reads, if GET/HEAD/etc - return $cb; -} - -sub init { - my Perlbal::ClientHTTP $self = shift; - $self->{put_in_progress} = 0; - $self->{put_fh} = undef; - $self->{put_pos} = 0; - $self->{chunked_upload_state} = undef; -} - -sub close { - my Perlbal::ClientHTTP $self = shift; - - # don't close twice - return if $self->{closed}; - - $self->{put_fh} = undef; - $self->SUPER::close(@_); -} - -sub event_read { - my Perlbal::ClientHTTP $self = shift; - $self->{alive_time} = $Perlbal::tick_time; - - # see if we have headers? - if ($self->{req_headers}) { - if ($self->{req_headers}->request_method eq 'PUT') { - $self->event_read_put; - } else { - # since we have headers and we're not doing any special - # handling above, let's just disable read notification, because - # we won't do anything with the data - $self->watch_read(0); - } - return; - } - - # try and get the headers, if they're all here - my $hd = $self->read_request_headers - or return; - - $self->handle_request; -} - -# one-time routing of new request to the right handlers -sub handle_request { - my Perlbal::ClientHTTP $self = shift; - my $hd = $self->{req_headers}; - - $self->check_req_headers; - - # fully formed request received - $self->{requests}++; - - # notify that we're about to serve - return if $self->{service}->run_hook('start_web_request', $self); - return if $self->{service}->run_hook('start_http_request', $self); - - # GET/HEAD requests (local, from disk) - if ($hd->request_method eq 'GET' || $hd->request_method eq 'HEAD') { - # and once we have it, start serving - $self->watch_read(0); - return $self->_serve_request($hd); - } - - # PUT requests - return $self->handle_put if $hd->request_method eq 'PUT'; - - # DELETE requests - return $self->handle_delete if $hd->request_method eq 'DELETE'; - - # else, bad request - return $self->send_response(400); -} - -sub handle_put { - my Perlbal::ClientHTTP $self = shift; - my $hd = $self->{req_headers}; - - return $self->send_response(403) unless $self->{service}->{enable_put}; - - return if $self->handle_put_chunked; - - # they want to put something, so let's setup and wait for more reads - my $clen = - $self->{content_length} = - $self->{content_length_remain} = - $hd->header('Content-length') + 0; - - # return a 400 (bad request) if we got no content length or if it's - # bigger than any specified max put size - return $self->send_response(400, "Content-length of $clen is invalid.") - if ! defined($clen) || - $clen < 0 || - ($self->{service}->{max_put_size} && - $clen > $self->{service}->{max_put_size}); - - # if we are supposed to read data and have some data already from a header over-read, note it - if ($clen && defined $self->{read_ahead} && $self->{read_ahead} > 0) { - $self->{content_length_remain} -= $self->{read_ahead}; - } - - return if $self->{service}->run_hook('handle_put', $self); - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - return $self->send_response(400, 'Invalid filename') - unless $uri =~ m!^ - ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is - # one+ conservative characters - / # path separator - ([\w\-\.]+) # $2: and the filename, one+ conservative characters - $!x; - - # sanitize uri into path and file into a disk path and filename - my ($path, $filename) = ($1 || '', $2); - - # the final action we'll be taking, eventually, is to start an async - # file open of the requested disk path. but we might need to verify - # the min_put_directory first. - my $start_open = sub { - my $disk_path = $self->{service}->{docroot} . '/' . $path; - $self->start_put_open($disk_path, $filename); - }; - - # verify minput if necessary - if ($self->{service}->{min_put_directory}) { - my @elems = grep { defined $_ && length $_ } split '/', $path; - return $self->send_response(400, 'Does not meet minimum directory requirement') - unless scalar(@elems) >= $self->{service}->{min_put_directory}; - my $req_path = '/' . join('/', splice(@elems, 0, $self->{service}->{min_put_directory})); - my $extra_path = '/' . join('/', @elems); - $self->validate_min_put_directory($req_path, $extra_path, $filename, $start_open); - } else { - $start_open->(); - } - - return; -} - -sub handle_put_chunked { - my Perlbal::ClientHTTP $self = shift; - my $req_hd = $self->{req_headers}; - my $te = $req_hd->header("Transfer-Encoding"); - return unless $te && $te eq "chunked"; - - my $eh = $req_hd->header("Expect"); - if ($eh && $eh =~ /\b100-continue\b/) { - $self->write(\ "HTTP/1.1 100 Continue\r\n\r\n"); - } - - my $max_size = $self->{service}{max_chunked_request_size}; - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - return $self->send_response(400, 'Invalid filename') - unless $uri =~ m!^ - ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is - # one+ conservative characters - / # path separator - ([\w\-\.]+) # $2: and the filename, one+ conservative characters - $!x; - - # sanitize uri into path and file into a disk path and filename - my ($path, $filename) = ($1 || '', $2); - - my $disk_path = $self->{service}->{docroot} . '/' . $path; - - $self->{chunked_upload_state} = Perlbal::ChunkedUploadState->new(%{{ - on_new_chunk => sub { - my $cref = shift; - my $len = length($$cref); - push @{$self->{read_buf}}, $cref; - - $self->{read_ahead} += $len; - $self->{content_length} += $len; - - # if too large, disconnect them... - if ($max_size && $self->{content_length} > $max_size) { - # TODO: delete file at this point? we're disconnecting them - # to prevent them from writing more, but do we care to keep - # what they already wrote? - $self->close; - return; - } - - $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary - }, - on_disconnect => sub { - warn "Disconnect during chunked PUT.\n"; - - # TODO: do we unlink the file here, since it wasn't a proper close - # ending in a zero-length chunk? perhaps a config option? for - # now we'll just leave it on disk with what we've got so far: - $self->close('remote_closure_during_chunked_put'); - }, - on_zero_chunk => sub { - $self->{chunked_upload_state} = undef; - $self->watch_read(0); - - # kick off any necessary aio writes: - $self->put_writeout; - # this will do nothing, if a put is already in progress: - $self->put_close; - }, - }}); - - $self->start_put_open($disk_path, $filename); - - return 1; -} - -# called when we're requested to do a delete -sub handle_delete { - my Perlbal::ClientHTTP $self = shift; - - return $self->send_response(403) unless $self->{service}->{enable_delete}; - - $self->watch_read(0); - - # error in filename? (any .. is an error) - my $uri = $self->{req_headers}->request_uri; - return $self->send_response(400, 'Invalid filename') - if $uri =~ /\.\./; - - # now we want to get the URI - if ($uri =~ m!^(?:/[\w\-\.]+)+$!) { - # now attempt the unlink - Perlbal::AIO::aio_unlink($self->{service}->{docroot} . '/' . $uri, sub { - my $err = shift; - if ($err == 0 && !$!) { - # delete was successful - return $self->send_response(204); - } elsif ($! == ENOENT) { - # no such file - return $self->send_response(404); - } else { - # failure... - return $self->send_response(400, "$!"); - } - }); - } else { - # bad URI, don't accept the delete - return $self->send_response(400, 'Invalid filename'); - } -} - -sub event_read_put { - my Perlbal::ClientHTTP $self = shift; - - if (my $cus = $self->{chunked_upload_state}) { - $cus->on_readable($self); - return; - } - - # read in data and shove it on the read buffer - my $dataref = $self->read($self->{content_length_remain}); - - # unless they disconnected prematurely - unless (defined $dataref) { - $self->close('remote_closure'); - return; - } - - # got some data - push @{$self->{read_buf}}, $dataref; - my $clen = length($$dataref); - $self->{read_size} += $clen; - $self->{read_ahead} += $clen; - $self->{content_length_remain} -= $clen; - - if ($self->{content_length_remain}) { - $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary - } else { - # now, if we've filled the content of this put, we're done - $self->watch_read(0); - $self->put_writeout; - } -} - -# verify that a minimum put directory exists. if/when it's verified, -# perhaps cached, the provided callback will be run. -sub validate_min_put_directory { - my Perlbal::ClientHTTP $self = shift; - my ($req_path, $extra_path, $filename, $callback) = @_; - - my $disk_dir = $self->{service}->{docroot} . '/' . $req_path; - return $callback->() if $VerifiedDirs{$disk_dir}; - - $self->{put_in_progress} = 1; - - Perlbal::AIO::aio_open($disk_dir, O_RDONLY, 0755, sub { - my $fh = shift; - $self->{put_in_progress} = 0; - - # if error return failure - return $self->send_response(404, "Base directory does not exist") unless $fh; - CORE::close($fh); - - # mindir existed, mark it as so and start the open for the rest of the path - $VerifiedDirs{$disk_dir} = 1; - $callback->(); - }); -} - -# attempt to open a file being PUT for writing to disk -sub start_put_open { - my Perlbal::ClientHTTP $self = shift; - my ($path, $file) = @_; - - $self->{put_in_progress} = 1; - - Perlbal::AIO::aio_open("$path/$file", O_CREAT | O_TRUNC | O_WRONLY, 0644, sub { - # get the fd - my $fh = shift; - - # verify file was opened - $self->{put_in_progress} = 0; - - if (! $fh) { - if ($! == ENOENT) { - # directory doesn't exist, so let's manually create it - eval { File::Path::mkpath($path, 0, 0755); }; - return $self->system_error("Unable to create directory", "path = $path, file = $file") if $@; - - # should be created, call self recursively to try - return $self->start_put_open($path, $file); - } else { - return $self->system_error("Internal error", "error = $!, path = $path, file = $file"); - } - } - - $self->{put_fh} = $fh; - $self->{put_pos} = 0; - $self->{put_fh_filename} = "$path/$file"; - - # We just opened the file, haven't read_ahead any bytes, are expecting 0 bytes for read and we're - # not in chunked mode, so close the file immediately, we're done. - unless ($self->{read_ahead} || $self->{content_length_remain} || $self->{chunked_upload_state}) { - # FIXME this should be done through AIO - $self->put_close; - return; - } - - $self->put_writeout; - }); -} - -# called when we've got some put data to write out -sub put_writeout { - my Perlbal::ClientHTTP $self = shift; - Carp::confess("wrong class for $self") unless ref $self eq "Perlbal::ClientHTTP"; - - return if $self->{service}->run_hook('put_writeout', $self); - return if $self->{put_in_progress}; - return unless $self->{put_fh}; - return unless $self->{read_ahead}; - - my $data = join("", map { $$_ } @{$self->{read_buf}}); - my $count = length $data; - - # reset our input buffer - $self->{read_buf} = []; - $self->{read_ahead} = 0; - - # okay, file is open, write some data - $self->{put_in_progress} = 1; - - Perlbal::AIO::set_file_for_channel($self->{put_fh_filename}); - Perlbal::AIO::aio_write($self->{put_fh}, $self->{put_pos}, $count, $data, sub { - return if $self->{closed}; - - # see how many bytes written - my $bytes = shift() + 0; - - $self->{put_pos} += $bytes; - $self->{put_in_progress} = 0; - - # now recursively call ourselves? - if ($self->{read_ahead}) { - $self->put_writeout; - return; - } - - return if $self->{content_length_remain} || $self->{chunked_upload_state}; - - # we're done putting this file, so close it. - # FIXME this should be done through AIO - $self->put_close; - }); -} - -sub put_close { - my Perlbal::ClientHTTP $self = shift; - return if $self->{put_in_progress}; - return unless $self->{put_fh}; - - if (CORE::close($self->{put_fh})) { - $self->{put_fh} = undef; - return $self->send_response(200); - } else { - return $self->system_error("Error saving file", "error in close: $!"); - } -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientHTTPBase.pm b/lib/mogdeps/Perlbal/ClientHTTPBase.pm deleted file mode 100644 index 80d64cfc..00000000 --- a/lib/mogdeps/Perlbal/ClientHTTPBase.pm +++ /dev/null @@ -1,905 +0,0 @@ -###################################################################### -# Common HTTP functionality for ClientProxy and ClientHTTP -# possible states: -# reading_headers (initial state, then follows one of two paths) -# wait_backend, backend_req_sent, wait_res, xfer_res, draining_res -# wait_stat, wait_open, xfer_disk -# both paths can then go into persist_wait, which means they're waiting -# for another request from the user -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::ClientHTTPBase; -use strict; -use warnings; -no warnings qw(deprecated); - -use Sys::Syscall; -use base "Perlbal::Socket"; -use HTTP::Date (); -use fields ('service', # Perlbal::Service object - 'replacement_uri', # URI to send instead of the one requested; this is used - # to instruct _serve_request to send an index file instead - # of trying to serve a directory and failing - 'scratch', # extra storage; plugins can use it if they want - - # reproxy support - 'reproxy_file', # filename the backend told us to start opening - 'reproxy_file_size', # size of file, once we stat() it - 'reproxy_fh', # if needed, IO::Handle of fd - 'reproxy_file_offset', # how much we've sent from the file. - - 'post_sendfile_cb', # subref to run after we're done sendfile'ing the current file - - 'requests', # number of requests this object has performed for the user - - # service selector parent - 'selector_svc', # the original service from which we came - ); - -use Fcntl ':mode'; -use Errno qw(EPIPE ECONNRESET); -use POSIX (); - -# hard-code defaults can be changed with MIME management command -our $MimeType = {qw( - css text/css - doc application/msword - gif image/gif - htm text/html - html text/html - jpg image/jpeg - js application/x-javascript - mp3 audio/mpeg - mpg video/mpeg - pdf application/pdf - png image/png - tif image/tiff - tiff image/tiff - torrent application/x-bittorrent - txt text/plain - zip application/zip -)}; - -# ClientHTTPBase -sub new { - - my Perlbal::ClientHTTPBase $self = shift; - my ($service, $sock, $selector_svc) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); # init base fields - - $self->{service} = $service; - $self->{replacement_uri} = undef; - $self->{headers_string} = ''; - $self->{requests} = 0; - $self->{scratch} = {}; - $self->{selector_svc} = $selector_svc; - - $self->state('reading_headers'); - - $self->watch_read(1); - return $self; -} - -sub close { - my Perlbal::ClientHTTPBase $self = shift; - - # don't close twice - return if $self->{closed}; - - # could contain a closure with circular reference - $self->{post_sendfile_cb} = undef; - - # close the file we were reproxying, if any - CORE::close($self->{reproxy_fh}) if $self->{reproxy_fh}; - - # now pass up the line - $self->SUPER::close(@_); -} - -# given the response headers we just got, and considering our request -# headers, determine if we should be sending keep-alive header -# information back to the client -sub setup_keepalive { - my Perlbal::ClientHTTPBase $self = $_[0]; - print "ClientHTTPBase::setup_keepalive($self)\n" if Perlbal::DEBUG >= 2; - - # now get the headers we're using - my Perlbal::HTTPHeaders $reshd = $_[1]; - my Perlbal::HTTPHeaders $rqhd = $self->{req_headers}; - - # for now, we enforce outgoing HTTP 1.0 - $reshd->set_version("1.0"); - - # if we came in via a selector service, that's whose settings - # we respect for persist_client - my $svc = $self->{selector_svc} || $self->{service}; - my $persist_client = $svc->{persist_client} || 0; - print " service's persist_client = $persist_client\n" if Perlbal::DEBUG >= 3; - - # do keep alive if they sent content-length or it's a head request - my $do_keepalive = $persist_client && $rqhd->req_keep_alive($reshd); - if ($do_keepalive) { - print " doing keep-alive to client\n" if Perlbal::DEBUG >= 3; - my $timeout = $self->{service}->{persist_client_timeout}; - $reshd->header('Connection', 'keep-alive'); - $reshd->header('Keep-Alive', $timeout ? "timeout=$timeout, max=100" : undef); - } else { - print " doing connection: close\n" if Perlbal::DEBUG >= 3; - # FIXME: we don't necessarily want to set connection to close, - # but really set a space-separated list of tokens which are - # specific to the connection. "close" and "keep-alive" are - # just special. - $reshd->header('Connection', 'close'); - $reshd->header('Keep-Alive', undef); - } -} - -# overridden here from Perlbal::Socket to use the service value -sub max_idle_time { - return $_[0]->{service}->{persist_client_timeout}; -} - -# Called when this client is entering a persist_wait state, but before we are returned to base. -sub persist_wait { - -} - -# called when we've finished writing everything to a client and we need -# to reset our state for another request. returns 1 to mean that we should -# support persistence, 0 means we're discarding this connection. -sub http_response_sent { - my Perlbal::ClientHTTPBase $self = $_[0]; - - # close if we're supposed to - if ( - ! defined $self->{res_headers} || - ! $self->{res_headers}->res_keep_alive($self->{req_headers}) || - $self->{do_die} - ) - { - # do a final read so we don't have unread_data_waiting and RST - # the connection. IE and others send an extra \r\n after POSTs - my $dummy = $self->read(5); - - # close if we have no response headers or they say to close - $self->close("no_keep_alive"); - return 0; - } - - # if they just did a POST, set the flag that says we might expect - # an unadvertised \r\n coming from some browsers. Old Netscape - # 4.x did this on all POSTs, and Firefox/Safari do it on - # XmlHttpRequest POSTs. - if ($self->{req_headers}->request_method eq "POST") { - $self->{ditch_leading_rn} = 1; - } - - # now since we're doing persistence, uncork so the last packet goes. - # we will recork when we're processing a new request. - $self->tcp_cork(0); - - # reset state - $self->{replacement_uri} = undef; - $self->{headers_string} = ''; - $self->{req_headers} = undef; - $self->{res_headers} = undef; - $self->{reproxy_fh} = undef; - $self->{reproxy_file} = undef; - $self->{reproxy_file_size} = 0; - $self->{reproxy_file_offset} = 0; - $self->{read_buf} = []; - $self->{read_ahead} = 0; - $self->{read_size} = 0; - $self->{scratch} = {}; - $self->{post_sendfile_cb} = undef; - $self->state('persist_wait'); - - $self->persist_wait; - - if (my $selector_svc = $self->{selector_svc}) { - if (! $selector_svc->run_hook('return_to_base', $self)){ - $selector_svc->return_to_base($self); - } - } - - # NOTE: because we only speak 1.0 to clients they can't have - # pipeline in a read that we haven't read yet. - $self->watch_read(1); - $self->watch_write(0); - return 1; -} - -sub reproxy_fh { - my Perlbal::ClientHTTPBase $self = shift; - - # setter - if (@_) { - my ($fh, $size) = @_; - $self->state('xfer_disk'); - $self->{reproxy_fh} = $fh; - $self->{reproxy_file_offset} = 0; - $self->{reproxy_file_size} = $size; - - my $is_ssl_webserver = ( $self->{service}->{listener}->{sslopts} && - ( $self->{service}->{role} eq 'web_server') ); - - unless ($is_ssl_webserver) { - # call hook that we're reproxying a file - return $fh if $self->{service}->run_hook("start_send_file", $self); - # turn on writes (the hook might not have wanted us to) - $self->watch_write(1); - return $fh; - } else { # use aio_read for ssl webserver instead of sendfile - - print "webserver in ssl mode, sendfile disabled!\n" - if $Perlbal::DEBUG >= 3; - - # turn off writes - $self->watch_write(0); - #create filehandle for reading - my $data = ''; - Perlbal::AIO::aio_read($self->reproxy_fh, 0, 2048, $data, sub { - # got data? undef is error - return $self->_simple_response(500) unless $_[0] > 0; - - # seek into the file now so sendfile starts further in - my $ld = length $data; - sysseek($self->{reproxy_fh}, $ld, &POSIX::SEEK_SET); - $self->{reproxy_file_offset} = $ld; - # reenable writes after we get data - $self->tcp_cork(1); # by setting reproxy_file_offset above, - # it won't cork, so we cork it - $self->write($data); - $self->watch_write(1); - }); - return 1; - } - } - - return $self->{reproxy_fh}; -} - -sub event_read { - my Perlbal::ClientHTTPBase $self = shift; - - $self->{alive_time} = $Perlbal::tick_time; - - # see if we have headers? - die "Shouldn't get here! This is an abstract base class, pretty much, except in the case of the 'selector' role." - if $self->{req_headers}; - - my $hd = $self->read_request_headers; - return unless $hd; - - return if $self->{service}->run_hook('start_http_request', $self); - - # we must stop watching for events now, otherwise if there's - # PUT/POST overflow, it'll be sent to ClientHTTPBase, which can't - # handle it yet. must wait for the selector (which has as much - # time as it wants) to route as to our subclass, which can then - # re-enable reads. - $self->watch_read(0); - - my $select = sub { - # now that we have headers, it's time to tell the selector - # plugin that it's time for it to select which real service to - # use - my $selector = $self->{'service'}->selector(); - return $self->_simple_response(500, "No service selector configured.") - unless ref $selector eq "CODE"; - $selector->($self); - }; - - my $svc = $self->{'service'}; - if ($svc->{latency}) { - Danga::Socket->AddTimer($svc->{latency} / 1000, $select); - } else { - $select->(); - } -} - -sub reproxy_file_done { - my Perlbal::ClientHTTPBase $self = shift; - return if $self->{service}->run_hook('reproxy_fh_finished', $self); - # close the sendfile fd - CORE::close($self->{reproxy_fh}); - $self->{reproxy_fh} = undef; - if (my $cb = $self->{post_sendfile_cb}) { - $cb->(); - } else { - $self->http_response_sent; - } -} - -# client is ready for more of its file. so sendfile some more to it. -# (called by event_write when we're actually in this mode) -sub event_write_reproxy_fh { - my Perlbal::ClientHTTPBase $self = shift; - - my $remain = $self->{reproxy_file_size} - $self->{reproxy_file_offset}; - $self->tcp_cork(1) if $self->{reproxy_file_offset} == 0; - $self->watch_write(0); - - if ($self->{service}->{listener}->{sslopts}) { # SSL (sendfile does not do SSL) - return if $self->{closed}; - if ($remain <= 0) { #done - print "REPROXY SSL done\n" if Perlbal::DEBUG >= 2; - $self->reproxy_file_done; - return; - } - # queue up next read - Perlbal::AIO::set_file_for_channel($self->{reproxy_file}); - my $len = $remain > 4096 ? 4096 : $remain; # buffer size - my $buffer = ''; - Perlbal::AIO::aio_read( - $self->{reproxy_fh}, - $self->{reproxy_file_offset}, - $len, - $buffer, - sub { - return if $self->{closed}; - # we have buffer to send - my $rv = $_[0]; # arg is result of sysread - if (!defined($rv) || $rv <= 0) { # read error - # sysseek is called after sysread so $! not valid - $self->close('sysread_error'); - print STDERR "Error w/ reproxy sysread\n"; - return; - } - $self->{reproxy_file_offset} += $rv; - $self->tcp_cork(1); # by setting reproxy_file_offset above, - # it won't cork, so we cork it - $self->write($buffer); # start socket send - $self->watch_write(1); - } - ); - return; - } - - # cap at 128k sendfiles - my $to_send = $remain > 128 * 1024 ? 128 * 1024 : $remain; - - my $postread = sub { - return if $self->{closed}; - - my $sent = Perlbal::Socket::sendfile($self->{fd}, - fileno($self->{reproxy_fh}), - $to_send); - #warn "to_send = $to_send, sent = $sent\n"; - print "REPROXY Sent: $sent\n" if Perlbal::DEBUG >= 2; - - if ($sent < 0) { - return $self->close("epipe") if $! == EPIPE; - return $self->close("connreset") if $! == ECONNRESET; - print STDERR "Error w/ sendfile: $!\n"; - $self->close('sendfile_error'); - return; - } - $self->{reproxy_file_offset} += $sent; - - if ($sent >= $remain) { - $self->reproxy_file_done; - } else { - $self->watch_write(1); - } - }; - - # TODO: way to bypass readahead and go straight to sendfile for common/hot/recent files. - # something like: - # if ($hot) { $postread->(); return ; } - - if ($to_send < 0) { - Perlbal::log('warning', "tried to readahead negative bytes. filesize=$self->{reproxy_file_size}, offset=$self->{reproxy_file_offset}"); - # this code, doing sendfile, will fail gracefully with return - # code, not 'die', and we'll close with sendfile_error: - $postread->(); - return; - } - - Perlbal::AIO::set_file_for_channel($self->{reproxy_file}); - Perlbal::AIO::aio_readahead($self->{reproxy_fh}, - $self->{reproxy_file_offset}, - $to_send, $postread); -} - -sub event_write { - my Perlbal::ClientHTTPBase $self = shift; - - # Any HTTP client is considered alive if it's writable. - # if it's not writable for 30 seconds, we kill it. - # subclasses can decide what's appropriate for timeout. - $self->{alive_time} = $Perlbal::tick_time; - - # if we're sending a filehandle, go do some more sendfile: - if ($self->{reproxy_fh}) { - $self->event_write_reproxy_fh; - return; - } - - # otherwise just kick-start our write buffer. - if ($self->write(undef)) { - # we've written all data in the queue, so stop waiting for - # write notifications: - print "All writing done to $self\n" if Perlbal::DEBUG >= 2; - $self->watch_write(0); - } -} - -# this gets called when a "web" service is serving a file locally. -sub _serve_request { - my Perlbal::ClientHTTPBase $self = shift; - my Perlbal::HTTPHeaders $hd = shift; - - my $rm = $hd->request_method; - unless ($rm eq "HEAD" || $rm eq "GET") { - return $self->_simple_response(403, "Unimplemented method"); - } - - my $uri = Perlbal::Util::durl($self->{replacement_uri} || $hd->request_uri); - my Perlbal::Service $svc = $self->{service}; - - # start_serve_request hook - return 1 if $self->{service}->run_hook('start_serve_request', $self, \$uri); - - # don't allow directory traversal - if ($uri =~ m!/\.\./! || $uri !~ m!^/!) { - return $self->_simple_response(403, "Bogus URL"); - } - - # double question mark means to serve multiple files, comma separated after the - # questions. the uri part before the question mark is the relative base directory - # TODO: only do this if $uri has ?? and the service also allows it. otherwise - # we don't want to mess with anybody's meaning of '??' on the backend service - return $self->_serve_request_multiple($hd, $uri) if $uri =~ /\?\?/; - - # chop off the query string - $uri =~ s/\?.*//; - - return $self->_simple_response(500, "Docroot unconfigured") - unless $svc->{docroot}; - - my $file = $svc->{docroot} . $uri; - - # update state, since we're now waiting on stat - $self->state('wait_stat'); - - Perlbal::AIO::aio_stat($file, sub { - # client's gone anyway - return if $self->{closed}; - unless (-e _) { - return if $self->{service}->run_hook('static_get_poststat_file_missing', $self); - return $self->_simple_response(404); - } - - my $mtime = (stat(_))[9]; - my $lastmod = HTTP::Date::time2str($mtime); - my $ims = $hd->header("If-Modified-Since") || ""; - - # IE sends a request header like "If-Modified-Since: ; length=" - # so we have to remove the length bit before comparing it with our date. - # then we save the length to compare later. - my $ims_len; - if ($ims && $ims =~ s/; length=(\d+)//) { - $ims_len = $1; - } - - my $not_mod = $ims eq $lastmod && -f _; - - my $res; - my $not_satisfiable = 0; - my $size = -s _ if -f _; - - # extra protection for IE, since it's offering the info anyway. (see above) - $not_mod = 0 if $ims_len && $ims_len != $size; - - my ($status, $range_start, $range_end) = $hd->range($size); - - if ($not_mod) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(304); - } elsif ($status == 416) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(416); - $res->header("Content-Range", $size ? "bytes */$size" : "*"); - $res->header("Content-Length", 0); - $not_satisfiable = 1; - } elsif ($status == 206) { - # partial content - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(206); - } else { - return if $self->{service}->run_hook('static_get_poststat_pre_send', $self, $mtime); - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - } - - # now set whether this is keep-alive or not - $res->header("Date", HTTP::Date::time2str()); - $res->header("Server", "Perlbal"); - $res->header("Last-Modified", $lastmod); - - if (-f _) { - # advertise that we support byte range requests - $res->header("Accept-Ranges", "bytes"); - - unless ($not_mod || $not_satisfiable) { - my ($ext) = ($file =~ /\.(\w+)$/); - $res->header("Content-Type", - (defined $ext && exists $MimeType->{$ext}) ? $MimeType->{$ext} : "text/plain"); - - unless ($status == 206) { - $res->header("Content-Length", $size); - } else { - $res->header("Content-Range", "bytes $range_start-$range_end/$size"); - $res->header("Content-Length", $range_end - $range_start + 1); - } - } - - # has to happen after content-length is set to work: - $self->setup_keepalive($res); - - return if $self->{service}->run_hook('modify_response_headers', $self); - - if ($rm eq "HEAD" || $not_mod || $not_satisfiable) { - # we can return already, since we know the size - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return; - } - - # state update - $self->state('wait_open'); - - Perlbal::AIO::aio_open($file, 0, 0, sub { - my $rp_fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($rp_fh) if $rp_fh; - return; - } - - # handle errors - if (! $rp_fh) { - # couldn't open the file we had already successfully stat'ed. - # FIXME: do 500 vs. 404 vs whatever based on $! - return $self->close('aio_open_failure'); - } - - $self->state('xfer_disk'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - - # seek if partial content - if ($status == 206) { - sysseek($rp_fh, $range_start, &POSIX::SEEK_SET); - $size = $range_end - $range_start + 1; - } - - $self->{reproxy_file} = $file; - $self->reproxy_fh($rp_fh, $size); - }); - - } elsif (-d _) { - $self->try_index_files($hd, $res, $uri); - } - }); -} - -sub _serve_request_multiple { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $uri) = @_; - - my @multiple_files; - my %statinfo; # file -> [ stat fields ] - - # double question mark means to serve multiple files, comma - # separated after the questions. the uri part before the question - # mark is the relative base directory - my ($base, $list) = ($uri =~ /(.+)\?\?(.+)/); - - unless ($base =~ m!/$!) { - return $self->_simple_response(500, "Base directory (before ??) must end in slash.") - } - - # and remove any trailing ?.+ on the list, so you can do things like cache busting - # with a ?v= at the end of a list of files. - $list =~ s/\?.+//; - - my Perlbal::Service $svc = $self->{service}; - return $self->_simple_response(500, "Docroot unconfigured") - unless $svc->{docroot}; - - @multiple_files = split(/,/, $list); - - return $self->_simple_response(403, "Multiple file serving isn't enabled") unless $svc->{enable_concatenate_get}; - return $self->_simple_response(403, "Too many files requested") if @multiple_files > 100; - return $self->_simple_response(403, "Bogus filenames") if grep { m!(?:\A|/)\.\./! } @multiple_files; - - my $remain = @multiple_files + 1; # 1 for the base directory - my $dirbase = $svc->{docroot} . $base; - foreach my $file ('', @multiple_files) { - Perlbal::AIO::aio_stat("$dirbase$file", sub { - $remain--; - $statinfo{$file} = $! ? [] : [ stat(_) ]; - return if $remain || $self->{closed}; - $self->_serve_request_multiple_poststat($hd, $dirbase, \@multiple_files, \%statinfo); - }); - } -} - -sub _serve_request_multiple_poststat { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $basedir, $filelist, $stats) = @_; - - # base directory must be a directory - unless (S_ISDIR($stats->{''}[2] || 0)) { - return $self->_simple_response(404, "Base directory not a directory"); - } - - # files must all exist - my $sum_length = 0; - my $most_recent_mod = 0; - my $mime; # undef until set, or defaults to text/plain later - foreach my $f (@$filelist) { - my $stat = $stats->{$f}; - unless (S_ISREG($stat->[2] || 0)) { - return if $self->{service}->run_hook('concat_get_poststat_file_missing', $self); - return $self->_simple_response(404, "One or more file does not exist"); - } - if (!$mime && $f =~ /\.(\w+)$/ && $MimeType->{$1}) { - $mime = $MimeType->{$1}; - } - $sum_length += $stat->[7]; - $most_recent_mod = $stat->[9] if - $stat->[9] >$most_recent_mod; - } - $mime ||= 'text/plain'; - - my $lastmod = HTTP::Date::time2str($most_recent_mod); - my $ims = $hd->header("If-Modified-Since") || ""; - - # IE sends a request header like "If-Modified-Since: ; length=" - # so we have to remove the length bit before comparing it with our date. - # then we save the length to compare later. - my $ims_len; - if ($ims && $ims =~ s/; length=(\d+)//) { - $ims_len = $1; - } - - # What is -f _ doing here? don't we detect the existence of all files above in the loop? - my $not_mod = $ims eq $lastmod && -f _; - - my $res; - if ($not_mod) { - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(304); - } else { - return if $self->{service}->run_hook('concat_get_poststat_pre_send', $self, $most_recent_mod); - $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Length", $sum_length); - } - - $res->header("Date", HTTP::Date::time2str()); - $res->header("Server", "Perlbal"); - $res->header("Last-Modified", $lastmod); - $res->header("Content-Type", $mime); - # has to happen after content-length is set to work: - $self->setup_keepalive($res); - return if $self->{service}->run_hook('modify_response_headers', $self); - - if ($hd->request_method eq "HEAD" || $not_mod) { - # we can return already, since we know the size - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return; - } - - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->state('wait_open'); - - # gotta send all files, one by one... - my @remain = @$filelist; - $self->{post_sendfile_cb} = sub { - unless (@remain) { - $self->write(sub { $self->http_response_sent; }); - return; - } - - my $file = shift @remain; - my $fullfile = "$basedir$file"; - my $size = $stats->{$file}[7]; - - Perlbal::AIO::aio_open($fullfile, 0, 0, sub { - my $rp_fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($rp_fh) if $rp_fh; - return; - } - - # handle errors - if (! $rp_fh) { - # couldn't open the file we had already successfully stat'ed. - # FIXME: do 500 vs. 404 vs whatever based on $! - return $self->close('aio_open_failure'); - } - - $self->{reproxy_file} = $file; - $self->reproxy_fh($rp_fh, $size); - }); - }; - $self->{post_sendfile_cb}->(); -} - -sub check_req_headers { - my Perlbal::ClientHTTPBase $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - - if ($self->{service}->trusted_ip($self->peer_ip_string)) { - my @ips = split /,\s*/, ($hds->header("X-Forwarded-For") || ''); - - # This list may be empty, and that's OK, in that case we should unset the - # observed_ip_string, so no matter what we'll use the 0th element, whether - # it happens to be an ip string, or undef. - $self->observed_ip_string($ips[0]); - } - - return; -} - -sub try_index_files { - my Perlbal::ClientHTTPBase $self = shift; - my ($hd, $res, $uri, $filepos) = @_; - - # make sure this starts at 0 initially, and fail if it's past the end - $filepos ||= 0; - if ($filepos >= scalar(@{$self->{service}->{index_files} || []})) { - unless ($self->{service}->{dirindexing}) { - # just inform them that listing is disabled - $self->_simple_response(200, "Directory listing disabled"); - return; - } - - # ensure uri has one and only one trailing slash for better URLs - $uri =~ s!/*$!/!; - - # open the directory and create an index - my $body = ""; - my $file = $self->{service}->{docroot} . $uri; - - $res->header("Content-Type", "text/html"); - opendir(D, $file); - foreach my $de (sort readdir(D)) { - if (-d "$file/$de") { - $body .= "$de
\n"; - } else { - $body .= "$de
\n"; - } - } - closedir(D); - - $body .= ""; - $res->header("Content-Length", length($body)); - $self->setup_keepalive($res); - - $self->state('xfer_resp'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->write(\$body); - $self->write(sub { $self->http_response_sent; }); - return; - } - - # construct the file path we need to check - my $file = $self->{service}->{index_files}->[$filepos]; - my $fullpath = $self->{service}->{docroot} . $uri . '/' . $file; - - # now see if it exists - Perlbal::AIO::aio_stat($fullpath, sub { - return if $self->{closed}; - return $self->try_index_files($hd, $res, $uri, $filepos + 1) unless -f _; - - # at this point the file exists, so we just want to serve it - $self->{replacement_uri} = $uri . '/' . $file; - return $self->_serve_request($hd); - }); -} - -sub _simple_response { - my Perlbal::ClientHTTPBase $self = shift; - my ($code, $msg) = @_; # or bodyref - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response($code); - - my $body; - if ($code != 204 && $code != 304) { - $res->header("Content-Type", "text/html"); - my $en = $res->http_code_english; - $body = "

$code" . ($en ? " - $en" : "") . "

\n"; - $body .= $msg if $msg; - $res->header('Content-Length', length($body)); - } - - $res->header('Server', 'Perlbal'); - - $self->setup_keepalive($res); - - $self->state('xfer_resp'); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - if (defined $body) { - unless ($self->{req_headers} && $self->{req_headers}->request_method eq 'HEAD') { - # don't write body for head requests - $self->write(\$body); - } - } - $self->write(sub { $self->http_response_sent; }); - return 1; -} - - -sub send_response { - my Perlbal::ClientHTTPBase $self = shift; - - $self->watch_read(0); - $self->watch_write(1); - return $self->_simple_response(@_); -} - -# method that sends a 500 to the user but logs it and any extra information -# we have about the error in question -sub system_error { - my Perlbal::ClientHTTPBase $self = shift; - my ($msg, $info) = @_; - - # log to syslog - Perlbal::log('warning', "system error: $msg ($info)"); - - # and return a 500 - return $self->send_response(500, $msg); -} - -sub event_err { my $self = shift; $self->close('error'); } -sub event_hup { my $self = shift; $self->close('hup'); } - -sub _sock_port { - my $name = $_[0]; - my $port = eval { (Socket::sockaddr_in($name))[0] }; - return $port unless $@; - # fallback to IPv6: - return (Socket6::unpack_sockaddr_in($name))[0]; -} - -sub as_string { - my Perlbal::ClientHTTPBase $self = shift; - - my $ret = $self->SUPER::as_string; - my $name = $self->{sock} ? getsockname($self->{sock}) : undef; - my $lport = $name ? _sock_port($name) : undef; - my $observed = $self->observed_ip_string; - $ret .= ": localport=$lport" if $lport; - $ret .= "; observed_ip=$observed" if defined $observed; - $ret .= "; reqs=$self->{requests}"; - $ret .= "; $self->{state}"; - - my $hd = $self->{req_headers}; - if (defined $hd) { - my $host = $hd->header('Host') || 'unknown'; - $ret .= "; http://$host" . $hd->request_uri; - } - - return $ret; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientManage.pm b/lib/mogdeps/Perlbal/ClientManage.pm deleted file mode 100644 index 5eee3c72..00000000 --- a/lib/mogdeps/Perlbal/ClientManage.pm +++ /dev/null @@ -1,150 +0,0 @@ -###################################################################### -# Management connection from a client -###################################################################### - -package Perlbal::ClientManage; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('service', - 'buf', - 'is_http', # bool: is an HTTP request? - 'ctx', # command context - ); - -# ClientManage -sub new { - my Perlbal::ClientManage $self = shift; - my ($service, $sock) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{buf} = ""; # what we've read so far, not forming a complete line - - $self->{ctx} = Perlbal::CommandContext->new; - $self->{ctx}->verbose(1); - - $self->watch_read(1); - return $self; -} - -# ClientManage -sub event_read { - my Perlbal::ClientManage $self = shift; - - my $bref; - unless ($self->{is_http}) { - $bref = $self->read(1024); - return $self->close() unless defined $bref; - $self->{buf} .= $$bref; - - if ($self->{buf} =~ /^(?:HEAD|GET|POST) /) { - $self->{is_http} = 1; - $self->{headers_string} .= $$bref; - } - } - - if ($self->{is_http}) { - my $hd = $self->read_request_headers; - return unless $hd; - $self->handle_http(); - return; - } - - while ($self->{buf} =~ s/^(.+?)\r?\n//) { - my $line = $1; - - if ($line =~ /^quit|exit$/) { - $self->close('user_requested_quit'); - return; - } - - my $out = sub { - $self->write("$_[0]\r\n"); - }; - - Perlbal::run_manage_command($line, $out, $self->{ctx}); - } -} - -sub event_write { - my $self = shift; - $self->watch_write(0) if $self->write(undef); -} - -# ClientManage -sub event_err { my $self = shift; $self->close; } -sub event_hup { my $self = shift; $self->close; } - -# HTTP management support -sub handle_http { - my Perlbal::ClientManage $self = shift; - - my $uri = $self->{req_headers}->request_uri; - - my $body; - my $code = "200 OK"; - - my $prebox = sub { - my $cmd = shift; - my $alt = shift; - $body .= "
$cmd
"; - Perlbal::run_manage_command($cmd, sub { - my $line = $_[0] || ""; - $alt->(\$line) if $alt; - $body .= "$line\n"; - }); - $body .= "
\n"; - - }; - - $body .= "\n"; - $body .= "\n"; - $body .= "perlbal management interface"; - - if ($uri eq "/") { - $body .= "

perlbal management interface

    "; - $body .= "
  • Sockets
  • "; - $body .= "
  • Perl Objects in use
  • "; - $body .= "
  • Service Details
      "; - foreach my $sname (Perlbal->service_names) { - my Perlbal::Service $svc = Perlbal->service($sname); - next unless $svc; - my $listen = $svc->{listen} ? " ($svc->{listen})" : ""; - $body .= "
    • $sname - $svc->{role}$listen
    • \n"; - } - $body .= "
  • "; - $body .= "
"; - } elsif ($uri eq "/socks") { - $prebox->('socks summary'); - - $prebox->('socks', sub { - ${$_[0]} =~ s!service \'(\w+)\'!$1!; - }); - } elsif ($uri eq "/obj") { - $prebox->('obj'); - } elsif ($uri =~ m!^/service\?(\w+)$!) { - my $service = $1; - $prebox->("show service $service"); - } else { - $code = "404 Not found"; - $body .= "

$code

"; - } - - $body .= "

Perlbal management.

\n"; - $self->write("HTTP/1.0 $code\r\nContent-type: text/html\r\nContent-Length: " . length($body) . - "\r\n\r\n$body"); - $self->write(sub { $self->close; }); - return; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ClientProxy.pm b/lib/mogdeps/Perlbal/ClientProxy.pm deleted file mode 100644 index 37cec088..00000000 --- a/lib/mogdeps/Perlbal/ClientProxy.pm +++ /dev/null @@ -1,1280 +0,0 @@ -###################################################################### -# HTTP Connection from a reverse proxy client -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# -package Perlbal::ClientProxy; -use strict; -use warnings; -use base "Perlbal::ClientHTTPBase"; -no warnings qw(deprecated); - -use Perlbal::ChunkedUploadState; -use Perlbal::Util; - -use fields ( - 'backend', # Perlbal::BackendHTTP object (or undef if disconnected) - 'backend_requested', # true if we've requested a backend for this request - 'reconnect_count', # number of times we've tried to reconnect to backend - 'high_priority', # boolean; 1 if we are or were in the high priority queue - 'low_priority', # boolean; 1 if we are or were in the low priority queue - 'reproxy_uris', # arrayref; URIs to reproxy to, in order - 'reproxy_expected_size', # int: size of response we expect to get back for reproxy - 'currently_reproxying', # arrayref; the host info and URI we're reproxying right now - 'content_length_remain', # int: amount of data we're still waiting for - 'responded', # bool: whether we've already sent a response to the user or not - 'last_request_time', # int: time that we last received a request - 'primary_res_hdrs', # if defined, we are doing a transparent reproxy-URI - # and the headers we get back aren't necessarily - # the ones we want. instead, get most headers - # from the provided res headers object here. - 'is_buffering', # bool; if we're buffering some/all of a request to memory/disk - 'is_writing', # bool; if on, we currently have an aio_write out - 'start_time', # hi-res time when we started getting data to upload - 'bufh', # buffered upload filehandle object - 'bufilename', # string; buffered upload filename - 'bureason', # string; if defined, the reason we're buffering to disk - 'buoutpos', # int; buffered output position - 'backend_stalled', # boolean: if backend has shut off its reads because we're too slow. - 'unread_data_waiting', # boolean: if we shut off reads while we know data is yet to be read from client - 'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef - 'request_body_length', # integer: request's body length, either as-declared, - # or calculated after chunked upload is complete - - # for perlbal sending out UDP packets related to upload status (for xmlhttprequest upload bar) - 'last_upload_packet', # unixtime we last sent a UDP upload packet - 'upload_session', # client's self-generated upload session - - # error-retrying stuff - 'retry_count', # number of times we've retried this request so far after getting 500 errors - ); - -use constant READ_SIZE => 131072; # 128k, ~common TCP window size? -use constant READ_AHEAD_SIZE => 32768; # kinda arbitrary. sum of these two is max stored per connection while waiting for backend. -use Errno qw( EPIPE ENOENT ECONNRESET EAGAIN ); -use POSIX qw( O_CREAT O_TRUNC O_RDWR O_RDONLY ); -use Time::HiRes qw( gettimeofday tv_interval ); - -my $udp_sock; - -# ClientProxy -sub new { - my Perlbal::ClientProxy $self = shift; - my ($service, $sock) = @_; - $self = fields::new($self) unless ref $self; - $self->SUPER::new($service, $sock ); - - Perlbal::objctor($self); - - $self->init; - $self->watch_read(1); - return $self; -} - -sub new_from_base { - my $class = shift; - my Perlbal::ClientHTTPBase $cb = shift; - Perlbal::Util::rebless($cb, $class); - $cb->init; - $cb->watch_read(1); - $cb->handle_request; - return $cb; -} - -sub init { - my Perlbal::ClientProxy $self = $_[0]; - - $self->{last_request_time} = 0; - - $self->{backend} = undef; - $self->{high_priority} = 0; - $self->{low_priority} = 0; - - $self->{responded} = 0; - $self->{unread_data_waiting} = 0; - $self->{content_length_remain} = undef; - $self->{backend_requested} = 0; - - $self->{is_buffering} = 0; - $self->{is_writing} = 0; - $self->{start_time} = undef; - $self->{bufh} = undef; - $self->{bufilename} = undef; - $self->{buoutpos} = 0; - $self->{bureason} = undef; - $self->{chunked_upload_state} = undef; - $self->{request_body_length} = undef; - - $self->{reproxy_uris} = undef; - $self->{reproxy_expected_size} = undef; - $self->{currently_reproxying} = undef; - - $self->{retry_count} = 0; -} - -# given a service name, re-request (GET/HEAD only) to that service, even though -# you've already done a request to your original service -sub start_reproxy_service { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::HTTPHeaders $primary_res_hdrs = $_[1]; - my $svc_name = $_[2]; - - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $self->_simple_response(404, "Vhost twiddling not configured for requested pair."); - return 1; - } - - $self->{backend_requested} = 0; - $self->{backend} = undef; - $self->{res_headers} = $primary_res_hdrs; - - $svc->adopt_base_client($self); -} - -# call this with a string of space separated URIs to start a process -# that will fetch the item at the first and return it to the user, -# on failure it will try the second, then third, etc -sub start_reproxy_uri { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::HTTPHeaders $primary_res_hdrs = $_[1]; - my $urls = $_[2]; - - # at this point we need to disconnect from our backend - $self->{backend} = undef; - - # failure if we have no primary response headers - return unless $self->{primary_res_hdrs} ||= $primary_res_hdrs; - - # construct reproxy_uri list - if (defined $urls) { - my @uris = split /\s+/, $urls; - $self->{currently_reproxying} = undef; - $self->{reproxy_uris} = []; - foreach my $uri (@uris) { - next unless $uri =~ m!^http://(.+?)(?::(\d+))?(/.*)?$!; - push @{$self->{reproxy_uris}}, [ $1, $2 || 80, $3 || '/' ]; - } - } - - # if we get in here and we have currently_reproxying defined, then something - # happened and we want to retry that one - if ($self->{currently_reproxying}) { - unshift @{$self->{reproxy_uris}}, $self->{currently_reproxying}; - $self->{currently_reproxying} = undef; - } - - # if we have no uris in our list now, tell the user 404 - return $self->_simple_response(503) - unless @{$self->{reproxy_uris} || []}; - - # set the expected size if we got a content length in our headers - if ($primary_res_hdrs && (my $expected_size = $primary_res_hdrs->header('X-REPROXY-EXPECTED-SIZE'))) { - $self->{reproxy_expected_size} = $expected_size; - } - - # pass ourselves off to the reproxy manager - $self->state('wait_backend'); - Perlbal::ReproxyManager::do_reproxy($self); -} - -# called by the reproxy manager when we can't get to our requested backend -sub try_next_uri { - my Perlbal::ClientProxy $self = $_[0]; - - shift @{$self->{reproxy_uris}}; - $self->{currently_reproxying} = undef; - $self->start_reproxy_uri(); -} - -# returns true if this ClientProxy is too many bytes behind the backend -sub too_far_behind_backend { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $backend = $self->{backend} or return 0; - - # if a backend doesn't have a service, it's a - # ReproxyManager-created backend, and thus it should use the - # 'buffer_size_reproxy_url' parameter for acceptable buffer - # widths, and not the regular 'buffer_size'. this lets people - # tune buffers depending on the types of webservers. (assumption - # being that reproxied-to webservers are event-based and it's okay - # to tie the up longer in favor of using less buffer memory in - # perlbal) - my $max_buffer = defined $backend->{service} ? - $self->{service}->{buffer_size} : - $self->{service}->{buffer_size_reproxy_url}; - - return $self->{write_buf_size} > $max_buffer; -} - -# this is a callback for when a backend has been created and is -# ready for us to do something with it -sub use_reproxy_backend { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # get a URI - my $datref = $self->{currently_reproxying} = shift @{$self->{reproxy_uris}}; - unless (defined $datref) { - # return error and close the backend - $be->close('invalid_uris'); - return $self->_simple_response(503); - } - - # now send request - $self->{backend} = $be; - $be->{client} = $self; - - my $extra_hdr = ""; - if (my $range = $self->{req_headers}->header("Range")) { - $extra_hdr .= "Range: $range\r\n"; - } - if (my $host = $self->{req_headers}->header("Host")) { - $extra_hdr .= "Host: $host\r\n"; - } - - my $req_method = $self->{req_headers}->request_method eq 'HEAD' ? 'HEAD' : 'GET'; - my $headers = "$req_method $datref->[2] HTTP/1.0\r\nConnection: keep-alive\r\n${extra_hdr}\r\n"; - - $be->{req_headers} = Perlbal::HTTPHeaders->new(\$headers); - $be->state('sending_req'); - $self->state('backend_req_sent'); - $be->write($be->{req_headers}->to_string_ref); - $be->watch_read(1); - $be->watch_write(1); -} - -# this is called when a transient backend getting a reproxied URI has received -# a response from the server and is ready for us to deal with it -sub backend_response_received { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # a response means that we are no longer currently waiting on a reproxy, and - # don't want to retry this URI - $self->{currently_reproxying} = undef; - - # we fail if we got something that's NOT a 2xx code, OR, if we expected - # a certain size and got back something different - my $code = $be->{res_headers}->response_code + 0; - - my $bad_code = sub { - return 0 if $code >= 200 && $code <= 299; - return 0 if $code == 416; - return 1; - }; - - my $bad_size = sub { - return 0 unless defined $self->{reproxy_expected_size}; - return $self->{reproxy_expected_size} != $be->{res_headers}->header('Content-length'); - }; - - if ($bad_code->() || $bad_size->()) { - # fall back to an alternate URL - $be->{client} = undef; - $be->close('non_200_reproxy'); - $self->try_next_uri; - return 1; - } - return 0; -} - -sub start_reproxy_file { - my Perlbal::ClientProxy $self = shift; - my $file = shift; # filename to reproxy - my Perlbal::HTTPHeaders $hd = shift; # headers from backend, in need of cleanup - - # at this point we need to disconnect from our backend - $self->{backend} = undef; - - # call hook for pre-reproxy - return if $self->{service}->run_hook("start_file_reproxy", $self, \$file); - - # set our expected size - if (my $expected_size = $hd->header('X-REPROXY-EXPECTED-SIZE')) { - $self->{reproxy_expected_size} = $expected_size; - } - - # start an async stat on the file - $self->state('wait_stat'); - Perlbal::AIO::aio_stat($file, sub { - - # if the client's since disconnected by the time we get the stat, - # just bail. - return if $self->{closed}; - - my $size = -s _; - - unless ($size) { - # FIXME: POLICY: 404 or retry request to backend w/o reproxy-file capability? - return $self->_simple_response(404); - } - if (defined $self->{reproxy_expected_size} && $self->{reproxy_expected_size} != $size) { - # 404; the file size doesn't match what we expected - return $self->_simple_response(404); - } - - # if the thing we're reproxying is indeed a file, advertise that - # we support byte ranges on it - if (-f _) { - $hd->header("Accept-Ranges", "bytes"); - } - - my ($status, $range_start, $range_end) = $self->{req_headers}->range($size); - my $not_satisfiable = 0; - - if ($status == 416) { - $hd = Perlbal::HTTPHeaders->new_response(416); - $hd->header("Content-Range", $size ? "bytes */$size" : "*"); - $not_satisfiable = 1; - } - - # change the status code to 200 if the backend gave us 204 No Content - $hd->code(200) if $hd->response_code == 204; - - # fixup the Content-Length header with the correct size (application - # doesn't need to provide a correct value if it doesn't want to stat()) - if ($status == 200) { - $hd->header("Content-Length", $size); - } elsif ($status == 206) { - $hd->header("Content-Range", "bytes $range_start-$range_end/$size"); - $hd->header("Content-Length", $range_end - $range_start + 1); - $hd->code(206); - } - - # don't send this internal header to the client: - $hd->header('X-REPROXY-FILE', undef); - - # rewrite some other parts of the header - $self->setup_keepalive($hd); - - # just send the header, now that we cleaned it. - $self->{res_headers} = $hd; - $self->write($hd->to_string_ref); - - if ($self->{req_headers}->request_method eq 'HEAD' || $not_satisfiable) { - $self->write(sub { $self->http_response_sent; }); - return; - } - - $self->state('wait_open'); - Perlbal::AIO::aio_open($file, 0, 0 , sub { - my $fh = shift; - - # if client's gone, just close filehandle and abort - if ($self->{closed}) { - CORE::close($fh) if $fh; - return; - } - - # handle errors - if (! $fh) { - # FIXME: do 500 vs. 404 vs whatever based on $! ? - return $self->_simple_response(500); - } - - # seek if partial content - if ($status == 206) { - sysseek($fh, $range_start, &POSIX::SEEK_SET); - $size = $range_end - $range_start + 1; - } - - $self->reproxy_fh($fh, $size); - $self->watch_write(1); - }); - }); -} - -# Client -# get/set backend proxy connection -sub backend { - my Perlbal::ClientProxy $self = shift; - return $self->{backend} unless @_; - - my $backend = shift; - $self->state('draining_res') unless $backend; - return $self->{backend} = $backend; -} - -# invoked by backend when it wants us to start watching for reads again -# and feeding it data (if we have any) -sub backend_ready { - my Perlbal::ClientProxy $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # if we'd turned ourselves off while we waited for a backend, turn - # ourselves back on, because the backend is ready for data now. - if ($self->{unread_data_waiting}) { - $self->watch_read(1); - } - - # normal, not-buffered-to-disk case: - return $self->drain_read_buf_to($be) unless $self->{bureason}; - - # buffered-to-disk case. - - # tell the backend it has to go into buffered_upload_mode, - # which makes it inform us of its writable availability - $be->invoke_buffered_upload_mode; -} - -# our backend enqueues a call to this method in our write buffer, so this is called -# right after we've finished sending all of the results to the user. at this point, -# if we were doing keep-alive, we don't close and setup for the next request. -sub backend_finished { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::backend_finished\n" if Perlbal::DEBUG >= 3; - - # mark ourselves as having responded (presumably if we're here, - # the backend has responded already) - $self->{responded} = 1; - - # our backend is done with us, so we disconnect ourselves from it - $self->{backend} = undef; - - # backend is done sending data to us, so we can recycle this clientproxy - # if we don't have any data yet to read - return $self->http_response_sent unless $self->{unread_data_waiting}; - - # if we get here (and we do, rarely, in practice) then that means - # the backend read was empty/disconnected (or otherwise messed up), - # and the only thing we can really do is close the client down. - $self->close("backend_finished_while_unread_data"); -} - -# Called when this client is entering a persist_wait state, but before we are returned to base. -sub persist_wait { - my Perlbal::ClientProxy $self = $_[0]; - # We're in keepalive, and just completed a proxy request - $self->{service}->run_hooks('end_proxy_request', $self); -} - -# called when we've sent a response to a user fully and we need to reset state -sub http_response_sent { - my Perlbal::ClientProxy $self = $_[0]; - - # persistence logic is in ClientHTTPBase - return 0 unless $self->SUPER::http_response_sent; - - print "ClientProxy::http_response_sent -- resetting state\n" if Perlbal::DEBUG >= 3; - - if (my $be = $self->{backend}) { - $self->{backend} = undef; - $be->forget_client; - } - - # if we get here we're being persistent, reset our state - $self->{backend_requested} = 0; - $self->{high_priority} = 0; - $self->{reproxy_uris} = undef; - $self->{reproxy_expected_size} = undef; - $self->{currently_reproxying} = undef; - $self->{content_length_remain} = undef; - $self->{primary_res_hdrs} = undef; - $self->{responded} = 0; - $self->{is_buffering} = 0; - $self->{is_writing} = 0; - $self->{start_time} = undef; - $self->{bufh} = undef; - $self->{bufilename} = undef; - $self->{buoutpos} = 0; - $self->{bureason} = undef; - $self->{upload_session} = undef; - $self->{chunked_upload_state} = undef; - $self->{request_body_length} = undef; - return 1; -} - -# to request a backend connection AFTER you've already done so, if you -# didn't like the results from the first one. (like after a 500 error) -sub rerequest_backend { - my Perlbal::ClientProxy $self = shift; - - $self->{backend_requested} = 0; - $self->{backend} = undef; - $self->request_backend; -} - -sub request_backend { - my Perlbal::ClientProxy $self = shift; - return if $self->{backend_requested}; - $self->{backend_requested} = 1; - - $self->state('wait_backend'); - $self->{service}->request_backend_connection($self); - $self->tcp_cork(1); # cork writes to self -} - -# Client (overrides and calls super) -sub close { - my Perlbal::ClientProxy $self = shift; - my $reason = shift; - - warn sprintf( - "Perlbal::ClientProxy closed %s%s.\n", - ( $self->{closed} ? "again " : "" ), - (defined $reason ? "saying '$reason'" : "for an unknown reason") - ) if Perlbal::DEBUG >= 2; - - # don't close twice - return if $self->{closed}; - - # signal that we're done - $self->{service}->run_hooks('end_proxy_request', $self); - - # kill our backend if we still have one - if (my $backend = $self->{backend}) { - print "Client ($self) closing backend ($backend)\n" if Perlbal::DEBUG >= 1; - $self->backend(undef); - $backend->close($reason ? "proxied_from_client_close:$reason" : "proxied_from_client_close"); - } else { - # if no backend, tell our service that we don't care for one anymore - $self->{service}->note_client_close($self); - } - - # call ClientHTTPBase's close - $self->SUPER::close($reason); -} - -sub client_disconnected { # : void - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::client_disconnected\n" if Perlbal::DEBUG >= 2; - - # if client disconnected, then we need to turn off watching for - # further reads and purge the existing upload if any. also, we - # should just return and do nothing else. - - $self->watch_read(0); - $self->purge_buffered_upload if $self->{bureason}; - return $self->close('user_disconnected'); -} - -# Client -sub event_write { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::event_write\n" if Perlbal::DEBUG >= 3; - - # obviously if we're writing the backend has processed our request - # and we are responding/have responded to the user, so mark it so - $self->{responded} = 1; - - # will eventually, finally reset the whole object on completion - $self->SUPER::event_write; - - # trigger our backend to keep reading, if it's still connected - if ($self->{backend_stalled} && (my $backend = $self->{backend})) { - print " unstalling backend\n" if Perlbal::DEBUG >= 3; - - $self->{backend_stalled} = 0; - $backend->watch_read(1); - } -} - -# ClientProxy -sub event_read { - my Perlbal::ClientProxy $self = shift; - print "ClientProxy::event_read\n" if Perlbal::DEBUG >= 3; - - # mark alive so we don't get killed for being idle - $self->{alive_time} = time; - - # if we have no headers, the only thing we can do is try to get some - if (! $self->{req_headers}) { - print " no headers. reading.\n" if Perlbal::DEBUG >= 3; - $self->handle_request if $self->read_request_headers; - return; - } - - # if we're buffering to disk or haven't read too much from this client, keep reading, - # otherwise shut off read notifications - unless ($self->{is_buffering} || $self->{read_ahead} < READ_AHEAD_SIZE) { - # our buffer is full, so turn off reads for now - print " disabling reads.\n" if Perlbal::DEBUG >= 3; - $self->watch_read(0); - return; - } - - # deal with chunked uploads - if (my $cus = $self->{chunked_upload_state}) { - $cus->on_readable($self); - - # if we got more than 1MB not flushed to disk, - # stop reading for a bit until disk catches up - if ($self->{read_ahead} > 1024*1024) { - $self->watch_read(0); - } - return; - } - - # read more data if we're still buffering or if our current read buffer - # is not full to the max READ_AHEAD_SIZE which is how much data we will - # buffer in from the user before passing on to the backend - - # read the MIN(READ_SIZE, content_length_remain) - my $read_size = READ_SIZE; - my $remain = $self->{content_length_remain}; - - $read_size = $remain if $remain && $remain < $read_size; - print " reading $read_size bytes (", (defined $remain ? $remain : "(undef)"), " bytes remain)\n" if Perlbal::DEBUG >= 3; - - my $bref = $self->read($read_size); - - # if the read returned undef, that means the connection was closed - # (see: Danga::Socket::read) - return $self->client_disconnected unless defined $bref; - - # if they didn't declare a content body length and we just got a - # readable event that's not a disconnect, something's messed up. - # they're overflowing us. disconnect! - if (! $remain) { - $self->_simple_response(400, "Can't pipeline to HTTP/1.0"); - $self->close("pipelining_to_http10"); - return; - } - - # now that we know we have a defined value, determine how long it is, and do - # housekeeping to keep our tracking numbers up to date. - my $len = length($$bref); - print " read $len bytes\n" if Perlbal::DEBUG >= 3; - - # when run under the program "trickle", epoll speaks the truth to - # us, but then trickle interferes and steals our reads/writes, so - # this fails. normally this check isn't needed. - return unless $len; - - $self->{read_size} += $len; - $self->{content_length_remain} -= $len if $remain; - - my $done_reading = defined $self->{content_length_remain} && $self->{content_length_remain} <= 0; - my $backend = $self->backend; - print(" done_reading = $done_reading, backend = ", ($backend || ""), "\n") if Perlbal::DEBUG >= 3; - - # upload tracking - if (my $session = $self->{upload_session}) { - my $cl = $self->{req_headers}->content_length; - my $remain = $self->{content_length_remain}; - my $now = time(); # FIXME: more efficient? - if ($cl && $remain && ($self->{last_upload_packet} || 0) != $now) { - my $done = $cl - $remain; - $self->{last_upload_packet} = $now; - $udp_sock ||= IO::Socket::INET->new(Proto => 'udp'); - my $since = $self->{last_request_time}; - my $send = "UPLOAD:$session:$done:$cl:$since:$now"; - if ($udp_sock) { - foreach my $ep (@{ $self->{service}{upload_status_listeners_sockaddr} }) { - my $rv = $udp_sock->send($send, 0, $ep); - } - } - } - } - - # just dump the read into the nether if we're dangling. that is - # the case when we send the headers to the backend and it responds - # before we're done reading from the client; therefore further - # reads from the client just need to be sent nowhere, because the - # RFC2616 section 8.2.3 says: "the server SHOULD NOT close the - # transport connection until it has read the entire request" - if ($self->{responded}) { - print " already responded.\n" if Perlbal::DEBUG >= 3; - # in addition, if we're now out of data (clr == 0), then we should - # either close ourselves or get ready for another request - return $self->http_response_sent if $done_reading; - - print " already responded [2].\n" if Perlbal::DEBUG >= 3; - # at this point, if the backend has responded then we just return - # as we don't want to send it on to them or buffer it up, which is - # what the code below does - return; - } - - # if we have no data left to read, stop reading. all that can - # come later is an extra \r\n which we handle later when parsing - # new request headers. and if it's something else, we'll bail on - # the next request, not this one. - if ($done_reading) { - Carp::confess("content_length_remain less than zero: self->{content_length_remain}") - if $self->{content_length_remain} < 0; - $self->{unread_data_waiting} = 0; - $self->watch_read(0); - } - - # now, if we have a backend, then we should be writing it to the backend - # and not doing anything else - if ($backend) { - print " got a backend. sending write to it.\n" if Perlbal::DEBUG >= 3; - $backend->write($bref); - # TODO: monitor the backend's write buffer depth? - return; - } - - # now, we know we don't have a backend, so we have to push this data onto our - # read buffer... it's not going anywhere yet - push @{$self->{read_buf}}, $bref; - $self->{read_ahead} += $len; - print " no backend. read_ahead = $self->{read_ahead}.\n" if Perlbal::DEBUG >= 3; - - # if we know we've already started spooling a file to disk, then continue - # to do that. - print " bureason = $self->{bureason}\n" if Perlbal::DEBUG >= 3 && $self->{bureason}; - return $self->buffered_upload_update if $self->{bureason}; - - # if we are under our buffer-to-memory size, just continue buffering here and - # don't fall through to the backend request call below - return if - ! $done_reading && - $self->{read_ahead} < $self->{service}->{buffer_backend_connect}; - - # over the buffer-to-memory size, see if we should start spooling to disk. - return if $self->{service}->{buffer_uploads} && $self->decide_to_buffer_to_disk; - - # give plugins a chance to act on the request before we request a backend - # (added by Chris Hondl , March 2006) - my $svc = $self->{service}; - return if $svc->run_hook('proxy_read_request', $self); - - # if we fall through to here, we need to ensure that a backend is on the - # way, because no specialized handling took over above - print " finally requesting a backend\n" if Perlbal::DEBUG >= 3; - return $self->request_backend; -} - -sub handle_request { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - - unless ($req_hd) { - $self->close("handle_request without headers"); - return; - } - - $self->check_req_headers; - - my $svc = $self->{service}; - # give plugins a chance to force us to bail - return if $svc->run_hook('start_proxy_request', $self); - return if $svc->run_hook('start_http_request', $self); - - if ($self->handle_chunked_upload) { - # handled in method. - } else { - # if defined we're waiting on some amount of data. also, we have to - # subtract out read_size, which is the amount of data that was - # extra in the packet with the header that's part of the body. - my $length = $self->{request_body_length} = - $self->{content_length_remain} = - $req_hd->content_length; - - if (defined $length && $length < 0) { - $self->_simple_response(400, "Invalid request: Content-Length < 0"); - $self->close("negative_content_length"); - return; - } - - $self->{unread_data_waiting} = 1 if $self->{content_length_remain}; - } - - # upload-tracking stuff. both starting a new upload track session, - # and checking on status of ongoing one - return if $svc->{upload_status_listeners} && $self->handle_upload_tracking; - - # note that we've gotten a request - $self->{requests}++; - $self->{last_request_time} = $self->{alive_time}; - - # either start buffering some of the request to memory, or - # immediately request a backend connection. - if ($self->{chunked_upload_state}) { - $self->{request_body_length} = 0; - $self->{is_buffering} = 1; - $self->{bureason} = 'chunked'; - $self->buffered_upload_update; - } elsif ($self->{content_length_remain} && $self->{service}->{buffer_backend_connect}) { - # the deeper path - $self->start_buffering_request; - } else { - # get the backend request process moving, since we aren't buffering - $self->{is_buffering} = 0; - - # if reproxy-caching is enabled, we can often bypass needing to allocate a BackendHTTP connection: - return if $svc->{reproxy_cache} && $self->satisfy_request_from_cache; - - $self->request_backend; - } -} - -sub handle_chunked_upload { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - my $te = $req_hd->header("Transfer-Encoding"); - return unless $te && $te eq "chunked"; - return unless $self->{service}->{buffer_uploads}; - - $req_hd->header("Transfer-Encoding", undef); # remove it (won't go to backend) - - my $eh = $req_hd->header("Expect"); - if ($eh && $eh =~ /\b100-continue\b/) { - $self->write(\ "HTTP/1.1 100 Continue\r\n\r\n"); - $req_hd->header("Expect", undef); # remove it (won't go to backend) - } - - my $max_size = $self->{service}{max_chunked_request_size}; - - my $args = { - on_new_chunk => sub { - my $cref = shift; - my $len = length($$cref); - push @{$self->{read_buf}}, $cref; - $self->{read_ahead} += $len; - $self->{request_body_length} += $len; - - # if too large, disconnect them... - if ($max_size && $self->{request_body_length} > $max_size) { - $self->purge_buffered_upload; - $self->close; - return; - } - $self->buffered_upload_update; - }, - on_disconnect => sub { - $self->client_disconnected; - }, - on_zero_chunk => sub { - $self->send_buffered_upload; - }, - }; - - $self->{chunked_upload_state} = Perlbal::ChunkedUploadState->new(%$args); - return 1; -} - -sub satisfy_request_from_cache { - my Perlbal::ClientProxy $self = shift; - - my $req_hd = $self->{req_headers}; - my $svc = $self->{service}; - my $cache = $svc->{reproxy_cache}; - $svc->{_stat_requests}++; - - my $requri = $req_hd->request_uri || ''; - my $hostname = $req_hd->header("Host") || ''; - - my $key = "$hostname|$requri"; - - my $reproxy = $cache->get($key) or - return 0; - - my ($timeout, $headers, $urls) = @$reproxy; - return 0 if time() > $timeout; - - $svc->{_stat_cache_hits}++; - my %headers = map { ref $_ eq 'SCALAR' ? $$_ : $_ } @{$headers || []}; - - if (my $ims = $req_hd->header("If-Modified-Since")) { - my ($lm_key) = grep { uc($_) eq "LAST-MODIFIED" } keys %headers; - my $lm = $headers{$lm_key} || ""; - - # remove the IE length suffix - $ims =~ s/; length=(\d+)//; - - # If 'Last-Modified' is same as 'If-Modified-Since', send a 304 - if ($ims eq $lm) { - my $res_hd = Perlbal::HTTPHeaders->new_response(304); - $res_hd->header("Content-Length", "0"); - $self->tcp_cork(1); - $self->state('xfer_resp'); - $self->write($res_hd->to_string_ref); - $self->write(sub { $self->http_response_sent; }); - return 1; - } - } - - my $res_hd = Perlbal::HTTPHeaders->new_response(200); - $res_hd->header("Date", HTTP::Date::time2str(time())); - while (my ($key, $value) = each %headers) { - $res_hd->header($key, $value); - } - - $self->start_reproxy_uri($res_hd, $urls); - return 1; -} - -# return 1 to steal this connection (when they're asking status of an -# upload session), return 0 to return it to handle_request's control. -sub handle_upload_tracking { - my Perlbal::ClientProxy $self = shift; - my $req_hd = $self->{req_headers}; - - return 0 unless - $req_hd->request_uri =~ /[\?&]client_up_sess=(\w{5,50})\b/; - - my $sess = $1; - - # getting status? - if ($req_hd->request_uri =~ m!^/__upload_status\?!) { - my $status = Perlbal::UploadListener::get_status($sess); - my $now = time(); - my $body = $status ? - "{done:$status->{done},total:$status->{total},starttime:$status->{starttime},nowtime:$now}" : - "{}"; - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Type", "text/plain"); - $res->header('Content-Length', length $body); - $self->setup_keepalive($res); - $self->tcp_cork(1); # cork writes to self - $self->write($res->to_string_ref); - $self->write(\ $body); - $self->write(sub { $self->http_response_sent; }); - return 1; - } - - # otherwise just tagging this upload as a new upload session - $self->{upload_session} = $sess; - return 0; -} - -# continuation of handle_request, in the case where we need to start buffering -# a bit of the request body to memory, either hoping that's all of it, or to -# make a determination of whether or not we should save it all to disk first -sub start_buffering_request { - my Perlbal::ClientProxy $self = shift; - - # buffering case: - $self->{is_buffering} = 1; - - # shortcut: if we know that we're buffering by size, and the size - # of this upload is bigger than that value, we can just turn on spool - # to disk right now... - if ($self->{service}->{buffer_uploads} && $self->{service}->{buffer_upload_threshold_size}) { - my $req_hd = $self->{req_headers}; - if ($req_hd->content_length >= $self->{service}->{buffer_upload_threshold_size}) { - $self->{bureason} = 'size'; - if ($ENV{PERLBAL_DEBUG_BUFFERED_UPLOADS}) { - $self->{req_headers}->header('X-PERLBAL-BUFFERED-UPLOAD-REASON', 'size'); - } - $self->state('buffering_upload'); - $self->buffered_upload_update; - return; - } - } - - # well, we're buffering, but we're not going to disk just yet (but still might) - $self->state('buffering_request'); - - # only need time if we are using the buffer to disk functionality - $self->{start_time} = [ gettimeofday() ] - if $self->{service}->{buffer_uploads}; -} - -# looks at our states and decides if we should start writing to disk -# or should just go ahead and blast this to the backend. returns 1 -# if the decision was made to buffer to disk -sub decide_to_buffer_to_disk { - my Perlbal::ClientProxy $self = shift; - return unless $self->{is_buffering}; - return $self->{bureason} if defined $self->{bureason}; - - # this is called when we have enough data to determine whether or not to - # start buffering to disk - my $dur = tv_interval($self->{start_time}) || 1; - my $rate = $self->{read_ahead} / $dur; - my $etime = $self->{content_length_remain} / $rate; - - # see if we have enough data to make the determination - my $reason = undef; - - # see if we blow the rate away - if ($self->{service}->{buffer_upload_threshold_rate} > 0 && - $rate < $self->{service}->{buffer_upload_threshold_rate}) { - # they are slower than the minimum rate - $reason = 'rate'; - } - - # and finally check estimated time exceeding - if ($self->{service}->{buffer_upload_threshold_time} > 0 && - $etime > $self->{service}->{buffer_upload_threshold_time}) { - # exceeds - $reason = 'time'; - } - - unless ($reason) { - $self->{is_buffering} = 0; - return 0; - } - - # start saving it to disk - $self->state('buffering_upload'); - $self->buffered_upload_update; - $self->{bureason} = $reason; - - if ($ENV{PERLBAL_DEBUG_BUFFERED_UPLOADS}) { - $self->{req_headers}->header('X-PERLBAL-BUFFERED-UPLOAD-REASON', $reason); - } - - return 1; -} - -# take ourselves and send along our buffered data to the backend -sub send_buffered_upload { - my Perlbal::ClientProxy $self = shift; - - # make sure our buoutpos is the same as the content length... - return if $self->{is_writing}; - - # set the content-length that goes to the backend... - if ($self->{chunked_upload_state}) { - $self->{req_headers}->header("Content-Length", $self->{request_body_length}); - } - - my $clen = $self->{req_headers}->content_length; - if ($clen != $self->{buoutpos}) { - Perlbal::log('crit', "Content length of $clen declared but $self->{buoutpos} bytes written to disk"); - return $self->_simple_response(500); - } - - # reset our position so we start reading from the right spot - $self->{buoutpos} = 0; - sysseek($self->{bufh}, 0, 0) if ($self->{bufh}); # But only if it exists at all - - # notify that we want the backend so we get the ball rolling - $self->request_backend; -} - -sub continue_buffered_upload { - my Perlbal::ClientProxy $self = shift; - my Perlbal::BackendHTTP $be = shift; - return unless $self && $be; - - # now send the data - my $clen = $self->{request_body_length}; - - if ($self->{buoutpos} < $clen) { - my $sent = Perlbal::Socket::sendfile($be->{fd}, fileno($self->{bufh}), $clen - $self->{buoutpos}); - if ($sent < 0) { - return $self->close("epipe") if $! == EPIPE; - return $self->close("connreset") if $! == ECONNRESET; - print STDERR "Error w/ sendfile: $!\n"; - return $self->close('sendfile_error'); - } - $self->{buoutpos} += $sent; - } - - # if we're done, purge the file and move on - if ($self->{buoutpos} >= $clen) { - $be->{buffered_upload_mode} = 0; - $self->purge_buffered_upload; - return; - } - - # we will be called again by the backend since buffered_upload_mode is on -} - -# write data to disk -sub buffered_upload_update { - my Perlbal::ClientProxy $self = shift; - return if $self->{is_writing}; - return unless $self->{is_buffering} && $self->{read_ahead}; - - # so we're not writing now and we have data to write... - unless ($self->{bufilename}) { - # create a filename and see if it exists or not - $self->{is_writing} = 1; - my $fn = join('-', $self->{service}->name, $self->{service}->listenaddr, "client", $self->{fd}, int(rand(0xffffffff))); - $fn = $self->{service}->{buffer_uploads_path} . '/' . $fn; - - # good, now we need to create the file - Perlbal::AIO::aio_open($fn, O_CREAT | O_TRUNC | O_RDWR, 0644, sub { - $self->{is_writing} = 0; - $self->{bufh} = shift; - - # throw errors back to the user - if (! $self->{bufh}) { - Perlbal::log('crit', "Failure to open $fn for buffered upload output"); - return $self->_simple_response(500); - } - - # save state and info and bounce it back to write data - $self->{bufilename} = $fn; - $self->buffered_upload_update; - }); - - return; - } - - # can't proceed if we have no disk file to async write to - # people reported seeing this crash rarely in production... - # must be a race between previously in-flight's write - # re-invoking a write immediately after something triggered - # a buffered upload purge. - unless ($self->{bufh}) { - $self->close; - return; - } - - # at this point, we want to do some writing - my $bref = shift(@{$self->{read_buf}}); - my $len = length $$bref; - $self->{read_ahead} -= $len; - - # so at this point we have a valid filename and file handle and should write out - # the buffer that we have - $self->{is_writing} = 1; - Perlbal::AIO::aio_write($self->{bufh}, $self->{buoutpos}, $len, $$bref, sub { - my $bytes = shift; - $self->{is_writing} = 0; - - # check for error - unless ($bytes > 0) { - Perlbal::log('crit', "Error writing buffered upload: $!. Tried to do $len bytes at $self->{buoutpos}."); - return $self->_simple_response(500); - } - - # update our count of data written - $self->{buoutpos} += $bytes; - - # now check if we wrote less than we had in this chunk of buffer. if that's - # the case then we need to re-enqueue the part of the chunk that wasn't - # written out and update as appropriate. - if ($bytes < $len) { - my $diff = $len - $bytes; - unshift @{$self->{read_buf}}, \ substr($$bref, $bytes, $diff); - $self->{read_ahead} += $diff; - } - - # if we're processing a chunked upload, ... - if ($self->{chunked_upload_state}) { - # turn reads back on, if we haven't hit the end yet. - if ($self->{unread_data_waiting} && $self->{read_ahead} < 1024*1024) { - $self->watch_read(1); - $self->{unread_data_waiting} = 0; - } - - if ($self->{read_ahead} == 0 && $self->{chunked_upload_state}->hit_zero_chunk) { - $self->watch_read(0); - $self->send_buffered_upload; - return; - } - } - - # if we're done (no clr and no read ahead!) then send it - elsif ($self->{read_ahead} <= 0 && $self->{content_length_remain} <= 0) { - $self->send_buffered_upload; - return; - } - - # spawn another writer! - $self->buffered_upload_update; - }); -} - -# destroy any files we've created -sub purge_buffered_upload { - my Perlbal::ClientProxy $self = shift; - - # Main reason for failure below is a 0-length chunked upload, where the file is never created. - return unless $self->{bufh}; - - # FIXME: it's reported that sometimes the two now-in-eval blocks - # fail, hence the eval blocks and warnings. the FIXME is to - # figure this out, why it happens sometimes. - - # first close our filehandle... not async - eval { - CORE::close($self->{bufh}); - }; - if ($@) { warn "Error closing file in ClientProxy::purge_buffered_upload: $@\n"; } - - $self->{bufh} = undef; - - eval { - # now asynchronously unlink the file - Perlbal::AIO::aio_unlink($self->{bufilename}, sub { - if ($!) { - # note an error, but whatever, we'll either overwrite the file later (O_TRUNC | O_CREAT) - # or a cleaner will come through and do it for us someday (if the user runs one) - Perlbal::log('warning', "Unable to link $self->{bufilename}: $!"); - } - }); - }; - if ($@) { warn "Error unlinking file in ClientProxy::purge_buffered_upload: $@\n"; } -} - -# returns bool; whether backend should hide the 500 error from the client -# and have us try a new backend. return true to retry, false to get a 500 error. -sub should_retry_after_500 { - my Perlbal::ClientProxy $self = shift; - my Perlbal::BackendHTTP $be = shift; - my $svc = $be->{service}; - return 0 unless $svc->{enable_error_retries}; - my @sched = split(/\s*,\s*/, $svc->{error_retry_schedule}); - return 0 if ++$self->{retry_count} > @sched; - return 1; -} - -# called by Backend to tell us it got a 500 error and we should retry another backend. -sub retry_after_500 { - my Perlbal::ClientProxy $self = shift; - my Perlbal::Service $svc = shift; - - my @sched = split(/\s*,\s*/, $svc->{error_retry_schedule}); - my $delay = $sched[$self->{retry_count} - 1]; - - if ($delay) { - Danga::Socket->AddTimer($delay, sub { - return if $self->{closed}; - $self->rerequest_backend; - }); - } else { - $self->rerequest_backend; - } - -} - -sub as_string { - my Perlbal::ClientProxy $self = shift; - - my $ret = $self->SUPER::as_string; - if ($self->{backend}) { - my $ipport = $self->{backend}->{ipport}; - $ret .= "; backend=$ipport"; - } else { - $ret .= "; write_buf_size=$self->{write_buf_size}" - if $self->{write_buf_size} > 0; - } - $ret .= "; highpri" if $self->{high_priority}; - $ret .= "; lowpri" if $self->{low_priority}; - $ret .= "; responded" if $self->{responded}; - $ret .= "; waiting_for=" . $self->{content_length_remain} - if defined $self->{content_length_remain}; - $ret .= "; reproxying" if $self->{currently_reproxying}; - - return $ret; -} - -sub set_queue_low { - my Perlbal::ClientProxy $self = shift; - $self->{low_priority} = 1; - return; -} - -sub set_queue_high { - my Perlbal::ClientProxy $self = shift; - $self->{high_priority} = 1; - return; -} - - -sub DESTROY { - Perlbal::objdtor($_[0]); - $_[0]->SUPER::DESTROY; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/CommandContext.pm b/lib/mogdeps/Perlbal/CommandContext.pm deleted file mode 100644 index 345d9617..00000000 --- a/lib/mogdeps/Perlbal/CommandContext.pm +++ /dev/null @@ -1,35 +0,0 @@ -# keep track of the surrounding context for a ManageCommand, so commands -# can be less verbose when in config files -# -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::CommandContext; -use strict; -use warnings; -no warnings qw(deprecated); - -use fields ( - 'last_created', # the name of the last pool or service created - 'verbose', # scalar bool: verbosity ("OK" on success) - ); - -sub new { - my $class = shift; - my $self = fields::new($class); - return $self; -} - -sub verbose { - my $self = shift; - $self->{verbose} = shift if @_; - $self->{verbose}; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/HTTPHeaders.pm b/lib/mogdeps/Perlbal/HTTPHeaders.pm deleted file mode 100644 index 8ceb3eae..00000000 --- a/lib/mogdeps/Perlbal/HTTPHeaders.pm +++ /dev/null @@ -1,483 +0,0 @@ -###################################################################### -# HTTP header class (both request and response) -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::HTTPHeaders; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal; - -use fields ( - 'headers', # href; lowercase header -> comma-sep list of values - 'origcase', # href; lowercase header -> provided case - 'hdorder', # aref; order headers were received (canonical order) - 'method', # scalar; request method (if GET request) - 'uri', # scalar; request URI (if GET request) - 'type', # 'res' or 'req' - 'code', # HTTP response status code - 'codetext', # status text that for response code - 'ver', # version (string) "1.1" - 'vernum', # version (number: major*1000+minor): "1.1" => 1001 - 'responseLine', # first line of HTTP response (if response) - 'requestLine', # first line of HTTP request (if request) - ); - -our $HTTPCode = { - 200 => 'OK', - 204 => 'No Content', - 206 => 'Partial Content', - 301 => 'Permanent Redirect', - 302 => 'Found', - 304 => 'Not Modified', - 400 => 'Bad request', - 403 => 'Forbidden', - 404 => 'Not Found', - 416 => 'Request range not satisfiable', - 500 => 'Internal Server Error', - 501 => 'Not Implemented', - 503 => 'Service Unavailable', -}; - -sub fail { - return undef unless Perlbal::DEBUG >= 1; - - my $reason = shift; - print "HTTP parse failure: $reason\n" if Perlbal::DEBUG >= 1; - return undef; -} - -sub http_code_english { - my Perlbal::HTTPHeaders $self = shift; - if (@_) { - return $HTTPCode->{shift()} || ""; - } else { - return "" unless $self->response_code; - return $HTTPCode->{$self->response_code} || ""; - } -} - -sub new_response { - my Perlbal::HTTPHeaders $self = shift; - $self = fields::new($self) unless ref $self; - - my $code = shift; - $self->{headers} = {}; - $self->{origcase} = {}; - $self->{hdorder} = []; - $self->{method} = undef; - $self->{uri} = undef; - - $self->{responseLine} = "HTTP/1.0 $code " . $self->http_code_english($code); - $self->{code} = $code; - $self->{type} = "httpres"; - - Perlbal::objctor($self, $self->{type}); - return $self; -} -*new_response_PERL = \&new_response; - -sub new { - my Perlbal::HTTPHeaders $self = shift; - $self = fields::new($self) unless ref $self; - - my ($hstr_ref, $is_response) = @_; - # hstr: headers as a string ref - # is_response: bool; is HTTP response (as opposed to request). defaults to request. - - my $absoluteURIHost = undef; - - my @lines = split(/\r?\n/, $$hstr_ref); - - $self->{headers} = {}; - $self->{origcase} = {}; - $self->{hdorder} = []; - $self->{method} = undef; - $self->{uri} = undef; - $self->{type} = ($is_response ? "res" : "req"); - Perlbal::objctor($self, $self->{type}); - - # check request line - if ($is_response) { - $self->{responseLine} = (shift @lines) || ""; - - # check for valid response line - return fail("Bogus response line") unless - $self->{responseLine} =~ m!^HTTP\/(\d+)\.(\d+)\s+(\d+)\s+(.*)$!; - - my ($ver_ma, $ver_mi, $code) = ($1, $2, $3); - $self->code($code, $4); - - # version work so we know what version the backend spoke - unless (defined $ver_ma) { - ($ver_ma, $ver_mi) = (0, 9); - } - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - } else { - $self->{requestLine} = (shift @lines) || ""; - - # check for valid request line - return fail("Bogus request line") unless - $self->{requestLine} =~ m!^(\w+) ((?:\*|(?:\S*?)))(?: HTTP/(\d+)\.(\d+))$!; - - $self->{method} = $1; - $self->{uri} = $2; - - my ($ver_ma, $ver_mi) = ($3, $4); - - # now check uri for not being a uri - if ($self->{uri} =~ m!^http://([^/:]+?)(?::\d+)?(/.*)?$!) { - $absoluteURIHost = lc($1); - $self->{uri} = $2 || "/"; # "http://www.foo.com" yields no path, so default to "/" - } - - # default to HTTP/0.9 - unless (defined $ver_ma) { - ($ver_ma, $ver_mi) = (0, 9); - } - - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - } - - my $last_header = undef; - foreach my $line (@lines) { - if ($line =~ /^\s/) { - next unless defined $last_header; - $self->{headers}{$last_header} .= $line; - } elsif ($line =~ /^([^\x00-\x20\x7f()<>@,;:\\\"\/\[\]?={}]+):\s*(.*)$/) { - # RFC 2616: - # sec 4.2: - # message-header = field-name ":" [ field-value ] - # field-name = token - # sec 2.2: - # token = 1* - - $last_header = lc($1); - if (defined $self->{headers}{$last_header}) { - if ($last_header eq "set-cookie") { - # cookie spec doesn't allow merged headers for set-cookie, - # so instead we do this hack so to_string below does the right - # thing without needing to be arrayref-aware or such. also - # this lets client code still modify/delete this data - # (but retrieving the value of "set-cookie" will be broken) - $self->{headers}{$last_header} .= "\r\nSet-Cookie: $2"; - } else { - # normal merged header case (according to spec) - $self->{headers}{$last_header} .= ", $2"; - } - } else { - $self->{headers}{$last_header} = $2; - $self->{origcase}{$last_header} = $1; - push @{$self->{hdorder}}, $last_header; - } - } else { - return fail("unknown header line"); - } - } - - # override the host header if an absolute URI was provided - $self->header('Host', $absoluteURIHost) - if defined $absoluteURIHost; - - # now error if no host - return fail("HTTP 1.1 requires host header") - if !$is_response && $self->{vernum} >= 1001 && !$self->header('Host'); - - return $self; -} -*new_PERL = \&new; - -sub _codetext { - my Perlbal::HTTPHeaders $self = shift; - return $self->{codetext} if $self->{codetext}; - return $self->http_code_english; -} - -sub code { - my Perlbal::HTTPHeaders $self = shift; - my ($code, $text) = @_; - $self->{codetext} = $text; - if (! defined $self->{code} || $code != $self->{code}) { - $self->{code} = $code+0; - if ($self->{responseLine}) { - $self->{responseLine} = "HTTP/1.0 $code " . $self->http_code_english; - } - } -} - -sub response_code { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->{code}; -} - -sub request_method { - my Perlbal::HTTPHeaders $self = shift; - return $self->{method}; -} - -sub request_uri { - my Perlbal::HTTPHeaders $self = shift; - return $self->{uri}; -} - -sub set_request_uri { - my Perlbal::HTTPHeaders $self = shift; - return unless $self->{requestLine}; - - my $uri = shift; - - return unless defined $uri and length $uri; - - my $ver = $self->{ver}; - - if ($ver == 0.9) { - $self->{requestLine} = sprintf("%s %s", $self->{method}, $uri); - } else { - $self->{requestLine} = sprintf("%s %s HTTP/%s", $self->{method}, $uri, $ver); - } - - return $self->{uri} = $uri; -} - -sub version_number { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->{vernum} unless $_[1]; - return $self->{vernum} = $_[1]; -} - -sub header { - my Perlbal::HTTPHeaders $self = shift; - my $key = shift; - return $self->{headers}{lc($key)} unless @_; - - # adding a new header - my $origcase = $key; - $key = lc($key); - unless (exists $self->{headers}{$key}) { - push @{$self->{hdorder}}, $key; - $self->{origcase}{$key} = $origcase; - } - - return $self->{headers}{$key} = shift; -} - -sub headers_list { - my Perlbal::HTTPHeaders $self = shift; - return [$self->{headers} ? keys %{ $self->{headers} } : ()]; -} - -sub to_string_ref { - my Perlbal::HTTPHeaders $self = shift; - my $st = join("\r\n", - $self->{requestLine} || $self->{responseLine}, - (map { "$self->{origcase}{$_}: $self->{headers}{$_}" } - grep { defined $self->{headers}{$_} } - @{$self->{hdorder}}), - '', ''); # final \r\n\r\n - return \$st; -} - -sub clone { - my Perlbal::HTTPHeaders $self = shift; - my $new = fields::new($self); - foreach (qw(method uri type code codetext ver vernum responseLine requestLine)) { - $new->{$_} = $self->{$_}; - } - - # mark this object as constructed - Perlbal::objctor($new, $new->{type}); - - $new->{headers} = { %{$self->{headers}} }; - $new->{origcase} = { %{$self->{origcase}} }; - $new->{hdorder} = [ @{$self->{hdorder}} ]; - return $new; -} - -sub set_version { - my Perlbal::HTTPHeaders $self = shift; - my $ver = shift; - - die "Bogus version" unless $ver =~ /^(\d+)\.(\d+)$/; - my ($ver_ma, $ver_mi) = ($1, $2); - - # check for req, as the other can be res or httpres - if ($self->{type} eq 'req') { - $self->{requestLine} = "$self->{method} $self->{uri} HTTP/$ver"; - } else { - $self->{responseLine} = "HTTP/$ver $self->{code} " . $self->_codetext; - } - $self->{ver} = "$ver_ma.$ver_mi"; - $self->{vernum} = $ver_ma*1000 + $ver_mi; - return $self; -} - -# using all available information, attempt to determine the content length of -# the message body being sent to us. -sub content_length { - my Perlbal::HTTPHeaders $self = shift; - - # shortcuts depending on our method/code, depending on what we are - if ($self->{type} eq 'req') { - # no content length for head requests - return 0 if $self->{method} eq 'HEAD'; - } elsif ($self->{type} eq 'res' || $self->{type} eq 'httpres') { - # no content length in any of these - if ($self->{code} == 304 || $self->{code} == 204 || - ($self->{code} >= 100 && $self->{code} <= 199)) { - return 0; - } - } - - # the normal case for a GET/POST, etc. real data coming back - # also, an OPTIONS requests generally has a defined but 0 content-length - if (defined(my $clen = $self->header("Content-Length"))) { - return $clen; - } - - # if we get here, nothing matched, so we don't definitively know what the - # content length is. this is usually an error, but we try to work around it. - return undef; -} - -# answers the question: "should a response to this person specify keep-alive, -# given the request (self) and the backend response?" this is used in proxy -# mode to determine based on the client's request and the backend's response -# whether or not the response from the proxy (us) should do keep-alive. -# -# FIXME: this is called too often (especially with service selector), -# and should be redesigned to be simpler, and/or cached on the -# connection. there's too much duplication with res_keep_alive. -sub req_keep_alive { - my Perlbal::HTTPHeaders $self = $_[0]; - my Perlbal::HTTPHeaders $res = $_[1] or Carp::confess("ASSERT: No response headers given"); - - # get the connection header now (saves warnings later) - my $conn = lc ($self->header('Connection') || ''); - - # check the client - if ($self->version_number < 1001) { - # they must specify a keep-alive header - return 0 unless $conn =~ /\bkeep-alive\b/i; - } - - # so it must be 1.1 which means keep-alive is on, unless they say not to - return 0 if $conn =~ /\bclose\b/i; - - # if we get here, the user wants keep-alive and seems to support it, - # so we make sure that the response is in a form that we can understand - # well enough to do keep-alive. FIXME: support chunked encoding in the - # future, which means this check changes. - return 1 if defined $res->header('Content-length') || - $res->response_code == 304 || # not modified - $res->response_code == 204 || # no content - $self->request_method eq 'HEAD'; - - # fail-safe, no keep-alive - return 0; -} - -# if an options response from a backend looks like it can do keep-alive. -sub res_keep_alive_options { - my Perlbal::HTTPHeaders $self = $_[0]; - return $self->res_keep_alive(undef, 1); -} - -# answers the question: "is the backend expected to stay open?" this -# is a combination of the request we sent to it and the response they -# sent... - -# FIXME: this is called too often (especially with service selector), -# and should be redesigned to be simpler, and/or cached on the -# connection. there's too much duplication with req_keep_alive. -sub res_keep_alive { - my Perlbal::HTTPHeaders $self = $_[0]; - my Perlbal::HTTPHeaders $req = $_[1]; - my $is_options = $_[2]; - Carp::confess("ASSERT: No request headers given") unless $req || $is_options; - - # get the connection header now (saves warnings later) - my $conn = lc ($self->header('Connection') || ''); - - # if they said Connection: close, it's always not keep-alive - return 0 if $conn =~ /\bclose\b/i; - - # handle the http 1.0/0.9 case which requires keep-alive specified - if ($self->version_number < 1001) { - # must specify keep-alive, and must have a content length OR - # the request must be a head request - return 1 if - $conn =~ /\bkeep-alive\b/i && - ($is_options || - defined $self->header('Content-length') || - $req->request_method eq 'HEAD' || - $self->response_code == 304 || # not modified - $self->response_code == 204 - ); # no content - - return 0; - } - - # HTTP/1.1 case. defaults to keep-alive, per spec, unless - # asked for otherwise (checked above) - # FIXME: make sure we handle a HTTP/1.1 response from backend - # with connection: close, no content-length, going to a - # HTTP/1.1 persistent client. we'll have to add chunk markers. - # (not here, obviously) - return 1; -} - -# returns (status, range_start, range_end) when given a size -# status = 200 - invalid or non-existent range header. serve normally. -# status = 206 - parseable range is good. serve partial content. -# status = 416 - Range is unsatisfiable -sub range { - my Perlbal::HTTPHeaders $self = $_[0]; - my $size = $_[1]; - - my $not_satisfiable; - my $range = $self->header("Range"); - - return 200 unless - $range && - defined $size && - $range =~ /^bytes=(\d*)-(\d*)$/; - - my ($range_start, $range_end) = ($1, $2); - - undef $range_start if $range_start eq ''; - undef $range_end if $range_end eq ''; - return 200 unless defined($range_start) or defined($range_end); - - if (defined($range_start) and defined($range_end) and $range_start > $range_end) { - return 416; - } elsif (not defined($range_start) and defined($range_end) and $range_end == 0) { - return 416; - } elsif (defined($range_start) and $size <= $range_start) { - return 416; - } - - $range_start = 0 unless defined($range_start); - $range_end = $size - 1 unless defined($range_end) and $range_end < $size; - - return (206, $range_start, $range_end); -} - - -sub DESTROY { - my Perlbal::HTTPHeaders $self = shift; - Perlbal::objdtor($self, $self->{type}); -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ManageCommand.pm b/lib/mogdeps/Perlbal/ManageCommand.pm deleted file mode 100644 index 21531eb0..00000000 --- a/lib/mogdeps/Perlbal/ManageCommand.pm +++ /dev/null @@ -1,104 +0,0 @@ -# class representing a one-liner management command. all the responses -# to a command should be done through this instance (out, err, ok, etc) -# -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ManageCommand; -use strict; -use warnings; -no warnings qw(deprecated); - -use fields ( - 'base', # the base command name (like "proc") - 'cmd', - 'ok', - 'err', - 'out', - 'orig', - 'argn', - 'ctx', - ); - -sub new { - my ($class, $base, $cmd, $out, $ok, $err, $orig, $ctx) = @_; - my $self = fields::new($class); - - $self->{base} = $base; - $self->{cmd} = $cmd; - $self->{ok} = $ok; - $self->{err} = $err; - $self->{out} = $out; - $self->{orig} = $orig; - $self->{ctx} = $ctx; - $self->{argn} = []; - return $self; -} - -# returns an managecommand object for functions that need one, but -# this does nothing but explode if there any problems. -sub loud_crasher { - use Carp qw(confess); - __PACKAGE__->new(undef, undef, sub {}, sub {}, sub { confess "MC:err: @_" }, "", Perlbal::CommandContext->new); -} - -sub out { my $mc = shift; return @_ ? $mc->{out}->(@_) : $mc->{out}; } -sub ok { my $mc = shift; return $mc->{ok}->(@_); } - -sub err { - my ($mc, $err) = @_; - $err =~ s/\n$//; - $mc->{err}->($err); -} - -sub cmd { my $mc = shift; return $mc->{cmd}; } -sub orig { my $mc = shift; return $mc->{orig}; } -sub end { my $mc = shift; $mc->{out}->("."); 1; } - -sub parse { - my $mc = shift; - my $regexp = shift; - my $usage = shift; - - my @ret = ($mc->{cmd} =~ /$regexp/); - $mc->parse_error($usage) unless @ret; - - my $i = 0; - foreach (@ret) { - $mc->{argn}[$i++] = $_; - } - return $mc; -} - -sub arg { - my $mc = shift; - my $n = shift; # 1-based array, to correspond with $1, $2, $3 - return $mc->{argn}[$n - 1]; -} - -sub args { - my $mc = shift; - return @{$mc->{argn}}; -} - -sub parse_error { - my $mc = shift; - my $usage = shift; - $usage .= "\n" if $usage && $usage !~ /\n$/; - die $usage || "Invalid syntax to '$mc->{base}' command\n" -} - -sub no_opts { - my $mc = shift; - die "The '$mc->{base}' command takes no arguments\n" - unless $mc->{cmd} eq $mc->{base}; - return $mc; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Plugin/AccessControl.pm b/lib/mogdeps/Perlbal/Plugin/AccessControl.pm deleted file mode 100644 index a13ad64f..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AccessControl.pm +++ /dev/null @@ -1,201 +0,0 @@ -package Perlbal::Plugin::AccessControl; - -use Perlbal; -use strict; -use warnings; -no warnings qw(deprecated); - -# commands like: -# -# what to do if we fall off the rule chain: -# ACCESS POLICY {ALLOW,DENY} -# -# adding things to the rule chain. processing stops once any rule is matched. -# -# ACCESS {ALLOW,DENY} netmask 127.0.0.1/8 -# ACCESS {ALLOW,DENY} ip 127.0.0.1 -# also can make a match set the request to go into the low-priority perlbal queue: -# ACCESS QUEUE_LOW ip 127.0.0.1 - -# reset the rule chain and policy: (policy is allow by default) -# ACCESS RESET - -# Future: -# access {allow,deny} forwarded_ip 127.0.0.1 -# access {allow,deny} method [,]* -# access {allow,deny} forwarded_netmask 127.0.0.1/24 - -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.access', sub { - my $mc = shift->parse(qr/^access\s+ - (policy|allow|deny|reset|queue_low) # cmd - (?:\s+(\S+))? # arg1 - (?:\s+(\S+))? # optional arg2 - $/x, - "usage: ACCESS []"); - my ($cmd, $arg1, $arg2) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $ss = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $ss; - - my $cfg = $ss->{extra_config}->{_access} ||= {}; - - if ($cmd eq "reset") { - $ss->{extra_config}->{_access} = {}; - return $mc->ok; - } - - if ($cmd eq "policy") { - return $mc->err("policy must be 'allow' or 'deny'") unless - $arg1 =~ /^allow|deny$/; - $cfg->{deny_default} = $arg1 eq "deny"; - return $mc->ok; - } - - if ($cmd eq "allow" || $cmd eq "deny" || $cmd eq "queue_low") { - my ($what, $val) = ($arg1, $arg2); - return $mc->err("Unknown item to $cmd: '$what'") unless - $what && ($what eq "ip" || $what eq "netmask"); - - if ($what eq "netmask") { - return $mc->err("Net::Netmask not installed") - unless eval { require Net::Netmask; 1; }; - - $val = eval { Net::Netmask->new2($val) }; - return $mc->err("Error parsing netmask") unless $val; - } - - my $rules = $cfg->{rules} ||= []; - push @$rules, [ $cmd, $what, $val ]; - return $mc->ok; - } - - return $mc->err("can't get here"); - }); - - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - Perlbal::unregister_global_hook('manage_command.access'); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $use_observed_ip; - - $svc->register_hook('AccessControl', 'start_http_request', sub { - my Perlbal::ClientHTTPBase $client = shift; - my Perlbal::HTTPHeaders $hds = $client->{req_headers}; - return 0 unless $hds; - my $uri = $hds->request_uri; - - my $allow = sub { 0; }; - my $deny = sub { - $client->send_response(403, "Access denied."); - return 1; - }; - - my $queue_low = sub { - $client->set_queue_low; - return 0; - }; - - my $rule_action = sub { - my $rule = shift; - if ($rule->[0] eq "deny") { - return $deny->(); - } elsif ($rule->[0] eq "allow") { - return $allow->(); - } elsif ($rule->[0] eq "queue_low") { - return $queue_low->(); - } - }; - - my $match = sub { - my $rule = shift; - if ($rule->[1] eq "ip") { - my $peer_ip; - $peer_ip = $client->observed_ip_string if $use_observed_ip; - $peer_ip ||= $client->peer_ip_string; - - return $peer_ip eq $rule->[2]; - } - - if ($rule->[1] eq "netmask") { - my $peer_ip; - $peer_ip = $client->observed_ip_string if $use_observed_ip; - $peer_ip ||= $client->peer_ip_string; - - return eval { $rule->[2]->match($peer_ip); }; - } - }; - - my $cfg = $svc->{extra_config}->{_access} ||= {}; - my $rules = $cfg->{rules} || []; - foreach my $rule (@$rules) { - next unless $match->($rule); - return $rule_action->($rule) - } - - return $deny->() if $cfg->{deny_default}; - return $allow->(); - }); - - # Allow AccessControl users to specify that they would like to use the observed IP as - # opposed to the real IP for ACL checking. - $svc->register_setter('AccessControl', 'use_observed_ip', sub { - my ($out, $what, $val) = @_; - return 0 unless $what; - - $use_observed_ip = $val; - - $out->("OK") if $out; - - return 1; - }); - - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -sub dumpconfig { - my ($class, $svc) = @_; - - my @return; - - my $cfg = $svc->{extra_config}->{_access} ||= {}; - my $rules = $cfg->{rules} || []; - - foreach my $rule (@$rules) { - my $action = uc $rule->[0]; - my $type = uc $rule->[1]; - my $value = $rule->[2]; - push @return, "ACCESS $action $type $value"; - } - - my $default_action = $cfg->{deny_default} ? "DENY" : "ALLOW"; - push @return, "ACCESS POLICY $default_action"; - - return @return; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AtomInject.pm b/lib/mogdeps/Perlbal/Plugin/AtomInject.pm deleted file mode 100644 index c5bc1aa9..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AtomInject.pm +++ /dev/null @@ -1,65 +0,0 @@ -package Perlbal::Plugin::AtomInject; - -use Perlbal; -use strict; -use warnings; - -our @subs; # subscribers - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - $svc->{enable_put} = 1; - - $svc->register_hook('AtomInject', 'handle_put', sub { - my Perlbal::ClientHTTP $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - return 0 unless $hds; - - return $self->send_response(400, "Invalid method") - unless $hds->request_method eq "PUT"; - - my $uri = $hds->request_uri; - return $self->send_response(400, "Invalid uri") unless $uri =~ /^\//; - $self->{scratch}{path} = $uri; - - # now abort the normal handle_put processing... - return 1; - }); - - $svc->register_hook('AtomInject', 'put_writeout', sub { - my Perlbal::ClientHTTP $self = shift; - return 1 if $self->{content_length_remain}; - - my $data = join("", map { $$_ } @{$self->{read_buf}}); - - # reset our input buffer - $self->{read_buf} = []; - $self->{read_ahead} = 0; - - my $rv = eval { Perlbal::Plugin::AtomStream->InjectFeed(\$data, $self->{scratch}{path}); }; - return $self->send_response(200); - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -# called when we are loaded -sub load { - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AtomStream.pm b/lib/mogdeps/Perlbal/Plugin/AtomStream.pm deleted file mode 100644 index 5c45b9d7..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AtomStream.pm +++ /dev/null @@ -1,138 +0,0 @@ -package Perlbal::Plugin::AtomStream; - -use URI; - -use Perlbal; -use strict; -use warnings; - -our @subs; # subscribers -our @recent; # recent items in format [$epoch, $atom_ref, $path_segments_arrayref] - -our $last_timestamp = 0; - -use constant MAX_LAG => 262144; - -sub InjectFeed { - my $class = shift; - my ($atomref, $path) = @_; - - # maintain queue of last 60 seconds worth of posts - my $now = time(); - my @put_segments = URI->new($path)->path_segments; - push @recent, [ $now, $atomref, \@put_segments ]; - shift @recent while @recent && $recent[0][0] <= $now - 60; - - emit_timestamp($now) if $now > $last_timestamp; - - my $need_clean = 0; - foreach my $s (@subs) { - if ($s->{closed}) { - $need_clean = 1; - next; - } - - next unless filter(\@put_segments, $s->{scratch}{get_segments}); - - my $lag = $s->{write_buf_size}; - - if ($lag > MAX_LAG) { - $s->{scratch}{skipped_atom}++; - } else { - if (my $skip_count = $s->{scratch}{skipped_atom}) { - $s->{scratch}{skipped_atom} = 0; - $s->write(\ "\n"); - } - $s->watch_write(0) if $s->write($atomref); - } - } - - if ($need_clean) { - @subs = grep { ! $_->{closed} } @subs; - } -} - -sub emit_timestamp { - my $time = shift; - $last_timestamp = $time; - foreach my $s (@subs) { - next if $s->{closed}; - $s->{alive_time} = $time; - $s->write(\ "\n"); - } -} - -sub filter { - my ($put, $get) = @_; - return 0 if scalar @$put < scalar @$get; - for( my $i = 0 ; $i < scalar @$get ; $i++) { - return 0 if $put->[$i] ne $get->[$i]; - } - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - Perlbal::Socket::register_callback(1, sub { - my $now = time(); - emit_timestamp($now) if $now > $last_timestamp; - return 1; - }); - - $svc->register_hook('AtomStream', 'start_http_request', sub { - my Perlbal::ClientProxy $self = shift; - my Perlbal::HTTPHeaders $hds = $self->{req_headers}; - return 0 unless $hds; - my $uri = URI->new($hds->request_uri); - my @get_segments = $uri->path_segments; - $self->{scratch}{get_segments} = \@get_segments; - return 0 unless pop @get_segments eq 'atom-stream.xml'; - my %params = $uri->query_form; - my $since = $params{since} =~ /\d+/ ? $params{since} : 0; - - my $res = $self->{res_headers} = Perlbal::HTTPHeaders->new_response(200); - $res->header("Content-Type", "text/xml"); - $res->header('Connection', 'close'); - - push @subs, $self; - - $self->write($res->to_string_ref); - - my $last_rv = $self->write(\ "\n\n"); - - # if they'd like a playback, give them all items >= time requested - if ($since) { - foreach my $item (@recent) { - next if $item->[0] < $since; - next unless filter($item->[2], \@get_segments); - $last_rv = $self->write($item->[1]); - } - } - - $self->watch_write(0) if $last_rv; - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - return 1; -} - -# called when we are loaded -sub load { - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm b/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm deleted file mode 100644 index 2d844227..00000000 --- a/lib/mogdeps/Perlbal/Plugin/AutoRemoveLeadingDir.pm +++ /dev/null @@ -1,51 +0,0 @@ -package Perlbal::Plugin::AutoRemoveLeadingDir; - -# -# this plugin auto-removes a leading directory path component -# in the URL, if it's the name of the directory the webserver -# is rooted at. -# -# if docroot = /home/lj/htdocs/stc/ -# -# and user requests: -# -# /stc/img/foo.jpg -# -# Then this plugin will treat that as if it's a request for /img/foo.jpg. -# -# This is useful for css/js/etc to have an "absolute" pathname for -# peer resources (think css having url(/stc/foo.jpg)) that can be served -# from either a separate hostname (stat.livejournal.com) and using a CDN, -# or from www. when cross-domain js restrictions require it. - -use Perlbal; -use strict; -use warnings; - -sub load { 1 } -sub unload { 1 } - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('AutoRemoveLeadingDir', 'start_serve_request', sub { - my Perlbal::ClientHTTPBase $client = shift; - my $uriref = shift; - - my Perlbal::Service $svc = $client->{service}; - my ($tail) = ($svc->{docroot} =~ m!/([\w-]+)/?$!); - $$uriref =~ s!^/$tail!! if $tail; - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Cgilike.pm b/lib/mogdeps/Perlbal/Plugin/Cgilike.pm deleted file mode 100644 index e841ff9f..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Cgilike.pm +++ /dev/null @@ -1,343 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2007 Martin Atkins and Six Apart Ltd. -# - -=head1 NAME - -Perlbal::Plugin::Cgilike - Handle Perlbal requests with a Perl subroutine - -=head1 DESCRIPTION - -This module allows responses to be handled with a simple API that's similar in principle to -CGI, mod_perl response handlers, etc. - -It does not, however, come anywhere close to conforming to the CGI "standard". It's actually -more like mod_perl in usage, though there are several differences. -Most notably, Perlbal is single-process and single-threaded, and handlers run inside the Perlbal -process and must therefore return quickly and not do any blocking operations. - -As it currently stands, this is very bare-bones and has only really been used with basic GET -requests. It lacks a nice API for handling the body of a POST or PUT request. - -It is not recommended to use this for extensive applications. Perlbal is first and foremost -a load balancer, so if you're doing something at all complicated you're probably better off -using something like Apache mod_perl and then putting Perlbal in front if it if necessary. -However, this plugin may prove useful for simple handlers or perhaps embedding a simple -HTTP service into another application that uses C. - -=head1 SYNOPSIS - -This module provides a Perlbal plugin which can be loaded and used as follows. - - LOAD cgilike - PERLREQUIRE = MyPackage - - CREATE SERVICE cgilike - SET role = web_server - SET listen = 127.0.0.1:80 - SET plugins = cgilike - PERLHANDLER = MyPackage::handler - ENABLE cgilike - -With this plugin loaded into a particular service, the plugin will then be called for -all requests for that service. - -Set cgilike.handler to the name of a subroutine that will handle requests. This subroutine -will receive an object which allows interaction with the Perlbal service. - - package MyPackage - sub handler { - my ($r) = @_; - if ($r->uri eq '/') { - print "

Hello, world

"; - return Perlbal::Plugin::Cgilike::HANDLED; - } - else { - return 404; - } - } - -Return C to indicate that the request has been handled, or return some HTTP error code -to produce a predefined error message. -You may also return C if you do not wish to handle the request, in which case Perlbal -will be allowed to handle the request in whatever way it would have done without Cgilike loaded. - -If your handler returns any non-success value, it B produce any output. If you -produce output before returning such a value, the response to the client is likely to be -utter nonsense. - -You may also return C, which is equivalent to -returning zero except that the HTTP connection will be left open once you return. It is -your responsibility to later call C<$r-Eend_response()> when you have completed -the response. This style is necessary when you need to perform some long operation -before you can return a response; you'll need to use some appropriate method to set -a callback to run when the operation completes and then do your response in the -callback. Once you've called C, you must not call any further methods on C<$r>; -it's probably safest to just return immediately afterwards to avoid any mishaps. - -=head1 API DOCUMENTATION - -TODO: Write this - -=head1 TODO - -Currently there is no API for dealing with the body of a POST or PUT request. Ideally it'd be able -to do automatic decoding of application/x-www-form-urlencoded data, too. - -The POSTPONE_RESPONSE functionality has not been tested extensively and is probably buggy. - -=head1 COPYRIGHT AND LICENSE - -Copyright 2007 Martin Atkins and Six Apart Ltd. - -This module is part of the Perlbal distribution, and as such can be distributed under the same licence terms as the rest of Perlbal. - -=cut - -package Perlbal::Plugin::Cgilike; - -use Perlbal; -use strict; -use Symbol; - -use constant DECLINED => -2; -use constant HANDLED => 0; -use constant POSTPONE_RESPONSE => -1; - -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('Cgilike', 'start_http_request', sub { Perlbal::Plugin::Cgilike::handle_request($svc, $_[0]); }); - -} - -sub handle_request { - my Perlbal::Service $svc = shift; - my Perlbal::ClientProxy $pb = shift; - return 0 unless $pb->{req_headers}; - - # Create a new request object, and tie a filehandle to it - my $output_handle = Symbol::gensym(); - my $req = tie(*{$output_handle}, 'Perlbal::Plugin::Cgilike::Request', $pb); - - my $handler = $svc->{extra_config}->{_perlhandler}; - if (! defined($handler)) { - return $pb->send_response(500, "No perlhandler is configured for this service"); - } - - # Our $output_handle is tied to the request object, which provides PRINT and PRINTF methods - # Set it as the default so that handlers can just use print and printf as normal. - my $oldfh = select($output_handle); - - my $ret; - my $result = eval { - no strict; - $ret = &{$handler}($req); - 1; - }; - - # Restore the old filehandle to avoid breaking anyone else - select($oldfh); - - if ($result) { - if ($ret == 0 || $ret == POSTPONE_RESPONSE) { - if ($ret == 0) { - $req->end_response(); - untie($req); - } - return 1; - } - elsif ($ret == DECLINED) { - return 0; - } - else { - return $pb->send_response($ret+0, $ret+0); - } - } - else { - return $pb->send_response(500, "Error in handler: ".$@); - } - - return $pb->send_response(500, "I seem to have fallen into a place I shouldn't be."); - -} - -sub handle_perlrequire_command { - # This is defined with an equals because Perlbal lowercases all manage commands except - # after an equals, which means that having an equals here is the only way to actually - # get the correct case of the module name. Lame++. - my $mc = shift->parse(qr/^perlrequire\s*=\s*([\w:]+)$/, "usage: PERLREQUIRE="); - my ($module) = $mc->args; - - my $success = eval "require $module; 1;"; - - unless ($success) { - return $mc->err("Failed to load $module: $@") - } - - return 1; -} - -sub handle_perlhandler_command { - my $mc = shift->parse(qr/^perlhandler\s*=\s*([\w:]+)$/, "usage: PERLHANDLER="); - my ($subname) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $svc = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $svc; - - my $cfg = $svc->{extra_config}->{_perlhandler} = $subname; - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - $svc->unregister_hooks('Cgilike'); - return 1; -} - -# called when we are loaded -sub load { - Perlbal::register_global_hook('manage_command.perlrequire', \&Perlbal::Plugin::Cgilike::handle_perlrequire_command); - Perlbal::register_global_hook('manage_command.perlhandler', \&Perlbal::Plugin::Cgilike::handle_perlhandler_command); - - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -package Perlbal::Plugin::Cgilike::Request; - -use URI; - -sub new { - my ($class, $pb) = @_; - - return bless { - pb => $pb, - header_sent => 0, - }, $class; -} - -# This class can also provide a tied handle which supports PRINT and PRINTF (but not much else) -sub TIEHANDLE { - my ($class, $req_headers) = @_; - return $class->new($req_headers); -} - -sub request_header { - return $_[0]->{pb}->{req_headers}; -} - -sub response_header { - my ($self, $k, $v) = @_; - - if (defined($k)) { - my $hdrs = $self->response_header; - if (defined($v)) { - $hdrs->header($k => $v); - return $v; - } - else { - return $hdrs->header($k); - } - } - else { - if (defined($self->{response_headers})) { - return $self->{response_headers}; - } - else { - return $self->{response_headers} = Perlbal::HTTPHeaders->new_response(200); - } - } -} - -sub response_status_code { - my ($self, $value) = @_; - - my $res = $self->response_header; - if (defined($value)) { - $res->code($value); - } - - return $res->response_code; -} - -sub uri { - my ($self) = @_; - return $self->{uri} ? $self->{uri} : $self->{uri} = URI->new($self->request_header->request_uri); -} - -sub path { - my ($self) = @_; - return $self->uri->path; -} - -sub path_segments { - my ($self) = @_; - my @segments = $self->uri->path_segments; - shift @segments; # Get rid of the empty segment at the start - return @segments; -} - -sub query_string { - my ($self) = @_; - return $self->uri->query; -} - -sub query_args { - my ($self) = @_; - return $self->uri->query_form; -} - -sub method { - my ($self) = @_; - return $self->request_header->request_method; -} - -sub send_response_header { - my ($self) = @_; - $self->response_header('Content-type' => 'text/html') unless $self->response_header('Content-type'); - $self->{pb}->write($self->response_header->to_string_ref); - $self->{header_sent} = 1; -} - -sub response_header_sent { - return $_[0]->{header_sent} ? 1 : 0; -} - -sub PRINT { - my ($self, @stuff) = @_; - $self->print(@stuff); -} - -sub PRINTF { - my ($self, $format, @stuff) = @_; - $self->print(sprintf($format, @stuff)); -} - -sub print { - my ($self, @stuff) = @_; - if (! $self->response_header_sent) { - $self->send_response_header(); - } - $self->{pb}->write(join("", @stuff)); -} - -sub end_response { - my ($self) = @_; - $self->{pb}->write(sub { $self->{pb}->http_response_sent; }); -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/EchoService.pm b/lib/mogdeps/Perlbal/Plugin/EchoService.pm deleted file mode 100644 index 3f6f1f7a..00000000 --- a/lib/mogdeps/Perlbal/Plugin/EchoService.pm +++ /dev/null @@ -1,123 +0,0 @@ -########################################################################### -# simple plugin demonstrating how to create an add-on service for Perlbal -# using the plugin infrastructure -########################################################################### - -package Perlbal::Plugin::EchoService; - -use strict; -use warnings; - -# on load we need to define the service and any parameters we want -sub load { - - # define the echo service, which instantiates this type of object - Perlbal::Service::add_role( - echo => \&Perlbal::Plugin::EchoService::Client::new, - ); - - # add up custom configuration options that people are allowed to set on the echo_service - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.echo_delay = 5 - # defines how long to wait between getting text and echoing it back - echo_delay => { - des => "Time in seconds to pause before sending text back using the echo service.", - default => 0, # no delay - check_role => "echo", - check_type => "int", - } - ); - - return 1; -} - -# remove the various things we've hooked into, this is required as a way of -# being good to the system... -sub unload { - Perlbal::Service::remove_tunable('echo_delay'); - Perlbal::Service::remove_role('echo'); - return 1; -} - - -########################################################################### -# this is the implementation of the client that gets instantiated by the -# service. (which is really all a service does - instantiate the right -# type of client, and store some information) -########################################################################### - -package Perlbal::Plugin::EchoService::Client; -use strict; -use warnings; - -use base "Perlbal::Socket"; -use fields ('service', # the service we're from - 'buf'); # the buffer of what we're reading - -# create a new object of this class -sub new { - my $class = "Perlbal::Plugin::EchoService::Client"; - my ($service, $sock) = @_; - my $self = fields::new($class); - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{buf} = ""; # what we've read so far, not forming a complete line - - $self->watch_read(1); - return $self; -} - -# called when we are readable - i.e. there is data available -sub event_read { - my Perlbal::Plugin::EchoService::Client $self = shift; - - # try to read in 1k of data, remember to close if you get undef, as that means - # something went wrong, or the socket was closed - my $bref = $self->read(1024); - return $self->close() unless defined $bref; - $self->{buf} .= $$bref; - - # now, parse out any lines that we have gotten. this just removes data line by - # line so we can handle it. - while ($self->{buf} =~ s/^(.+?)\r?\n//) { - my $line = $1; - - # package up a sub to do what we want. this is in a coderef because we either - # need to call it now or schedule it for later. saves some duplication. - my $do_echo = sub { $self->write("$line\r\n"); }; - - # if they want a delay, we have to schedule this for later - if (my $delay = $self->{service}->{extra_config}->{echo_delay}) { - # schedule - Danga::Socket->AddTimer($delay, $do_echo); - - } else { - # immediately, so run it - $do_echo->(); - - } - } -} - -# called when we are writeable - that is, we are allowed to write data now. try to -# flush any existing data and then if we have nothing in the write buffer left, -# go ahead and stop notifying us about writeability. -sub event_write { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->watch_write(0) if $self->write(undef); -} - -# if we run into some socket error, just close -sub event_err { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->close; -} - -# same thing if we get a hup -sub event_hup { - my Perlbal::Plugin::EchoService::Client $self = shift; - $self->close; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Highpri.pm b/lib/mogdeps/Perlbal/Plugin/Highpri.pm deleted file mode 100644 index 0c269681..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Highpri.pm +++ /dev/null @@ -1,125 +0,0 @@ -########################################################################### -# plugin that makes some requests high priority. this is very LiveJournal -# specific, as this makes requests to the client protocol be treated as -# high priority requests. -########################################################################### - -package Perlbal::Plugin::Highpri; - -use strict; -use warnings; - -# keep track of services we're loaded for -our %Services; - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # create a compiled regexp for very frequent use later - my $uri_check = qr{^(?:/interface/(?:xmlrpc|flat)|/login\.bml)$}; - my $host_check = undef; - - # setup default extra config info - $svc->{extra_config}->{highpri_uri_check_str} = '^(?:/interface/(?:xmlrpc|flat)|/login\.bml)$'; - $svc->{extra_config}->{highpri_host_check_str} = 'undef'; - - # config setter reference - my $config_set = sub { - my ($out, $what, $val) = @_; - return 0 unless $what && $val; - - # setup an error sub - my $err = sub { - $out->("ERROR: $_[0]") if $out; - return 0; - }; - - # if they said undef, that's not a regexp, that means use none - my $temp; - unless ($val eq 'undef' || $val eq 'none' || $val eq 'null') { - # verify this regexp works? do it in an eval because qr will die - # if we give it something invalid - eval { - $temp = qr{$val}; - }; - return $err->("Invalid regular expression") if $@ || !$temp; - } - - # see what they want to set and set it - if ($what =~ /^uri_pattern/i) { - $uri_check = $temp; - $svc->{extra_config}->{highpri_uri_check_str} = $val; - } elsif ($what =~ /^host_pattern/i) { - $host_check = $temp; - $svc->{extra_config}->{highpri_host_check_str} = $val; - } else { - return $err->("Plugin understands: uri_pattern, host_pattern"); - } - - # 1 for success! - return 1; - }; - - # register things to take in configuration regular expressions - $svc->register_setter('Highpri', 'uri_pattern', $config_set); - $svc->register_setter('Highpri', 'host_pattern', $config_set); - - # more complicated statistics - $svc->register_hook('Highpri', 'make_high_priority', sub { - my Perlbal::ClientProxy $cp = shift; - - # check it against our compiled regexp - return 1 if $uri_check && - $cp->{req_headers}->request_uri =~ /$uri_check/; - if ($host_check) { - my $hostname = $cp->{req_headers}->header('Host'); - return 1 if $hostname && $hostname =~ /$host_check/; - } - - # doesn't fit, so return 0 - return 0; - }); - - # mark this service as being active in this plugin - $Services{"$svc"} = $svc; - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Highpri'); - $svc->unregister_setters('Highpri'); - return 1; -} - -# load global commands for querying this plugin on what's up -sub load { - # setup a command to see what the patterns are - Perlbal::register_global_hook('manage_command.patterns', sub { - my @res = ("High priority pattern buffer:"); - - foreach my $svc (values %Services) { - push @res, "SET $svc->{name}.highpri.uri_pattern = $svc->{extra_config}->{highpri_uri_check_str}"; - push @res, "SET $svc->{name}.highpri.host_pattern = $svc->{extra_config}->{highpri_host_check_str}"; - } - - push @res, "."; - return \@res; - }); - - return 1; -} - -# unload our global commands, clear our service object -sub unload { - Perlbal::unregister_global_hook('manage_command.patterns'); - %Services = (); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Include.pm b/lib/mogdeps/Perlbal/Plugin/Include.pm deleted file mode 100644 index 09499e26..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Include.pm +++ /dev/null @@ -1,90 +0,0 @@ -=head1 NAME - -Perlbal::Plugin::Include - Allows multiple, nesting configuration files - -=head1 DESCRIPTION - -This module adds an INCLUDE command to the Perlbal management console -and allows the globbed inclusion of configuration files. - -=head1 SYNOPSIS - -This module provides a Perlbal plugin which can be loaded and used as -follows: - - LOAD include - INCLUDE = /etc/perlbal/my.conf - -You may also specify multiple configuration files a la File::Glob: - - INCLUDE = /foo/bar.conf /foo/quux/*.conf - -=head1 BUGS AND LIMITATIONS - -This module relies entirely on Perlbal::load_config for loading, so if -you have trouble with INCLUDE, be sure you can load the same -configuration without error using "perlbal -c" first. - -Also note that Perlbal::load_config versions 1.60 and below do not use -a local filehandle while reading the configuration file, so this -module overrides that routine on load to allow nested calls. - -=head1 COPYRIGHT AND LICENSE - -Copyright 2008 Eamon Daly - -This module is part of the Perlbal distribution, and as such can be -distributed under the same licence terms as the rest of Perlbal. - -=cut - -package Perlbal::Plugin::Include; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we are loaded -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.include', sub { - my $mc = shift->parse(qr/^include\s+=\s+(.+)\s*$/, - "usage: INCLUDE = "); - - my ($glob) = $mc->args; - - for (glob($glob)) { - Perlbal::load_config($_, sub { print STDOUT "$_[0]\n"; }); - } - - return $mc->ok; - }); - - return 1; -} - -# called for a global unload -sub unload { - # unregister our global hooks - Perlbal::unregister_global_hook('manage_command.include'); - - return 1; -} - -# In older versions of Perlbal, load_config uses a typeglob, throwing -# warnings when re-entering. This uses a locally-scoped filehandle. -sub load_config_local { - my ($file, $writer) = @_; - open(my $fh, $file) or die "Error opening config file ($file): $!\n"; - my $ctx = Perlbal::CommandContext->new; - $ctx->verbose(0); - while (my $line = <$fh>) { - $line =~ s/\$(\w+)/$ENV{$1}/g; - return 0 unless Perlbal::run_manage_command($line, $writer, $ctx); - } - close($fh); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm b/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm deleted file mode 100644 index c6814cf4..00000000 --- a/lib/mogdeps/Perlbal/Plugin/LazyCDN.pm +++ /dev/null @@ -1,103 +0,0 @@ -package Perlbal::Plugin::LazyCDN; - -use IO::Socket::INET; -use Perlbal; -use Perlbal::ClientHTTPBase; -use strict; -use warnings; - -sub load { - # add up custom configuration options that people are allowed to set - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.fallback_service = proxy - fallback_service => { - des => "Service name to fall back to when a static get or concat get requests something newer than on disk.", - check_role => "web_server", - } - ); - - Perlbal::Service::add_tunable( - # allow the following: - # SET myservice.fallback_udp_ping_addr = 5 - fallback_udp_ping_addr => { - des => "Address and port to send UDP packets containing URL requests .", - check_role => "web_server", - check_type => ["regexp", qr/^\d+\.\d+\.\d+\.\d+:\d+$/, "Expecting IP:port of form a.b.c.d:port."], - } - ); - return 1; -} - -# remove the various things we've hooked into, this is required as a way of -# being good to the system... -sub unload { - Perlbal::Service::remove_tunable('fallback_service'); - Perlbal::Service::remove_tunable('fallback_udp_ping_addr'); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $socket; - - my $hook = sub { - my Perlbal::ClientHTTPBase $client = shift; - my $last_modified = shift; # unix timestamp for last modified of the concatenated files - - my $fallback_service_name = $client->{service}->{extra_config}->{fallback_service}; - return unless $fallback_service_name; - - my $fallback_service = Perlbal->service($fallback_service_name); - return unless $fallback_service; - - my $req_hd = $client->{req_headers}; - - my $uri = $req_hd->request_uri; - - my ($v) = $uri =~ m/\bv=(\d+)\b/; - - if (defined $last_modified) { - return unless $v; - return 0 unless $v > $last_modified; - } - - if (my $fallback_ping_addr = $client->{service}->{extra_config}->{fallback_udp_ping_addr}) { - $socket ||= _ping_socket($fallback_ping_addr); - $socket->write($uri); - } - - $fallback_service->adopt_base_client( $client ); - - return 1; - }; - - $svc->register_hook('LazyCDN', 'static_get_poststat_pre_send', $hook); - $svc->register_hook('LazyCDN', 'concat_get_poststat_pre_send', $hook); - - $svc->register_hook('LazyCDN', 'static_get_poststat_file_missing', $hook); - $svc->register_hook('LazyCDN', 'concat_get_poststat_file_missing', $hook); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - return 1; -} - -sub _ping_socket { - my $hostspec = shift; - my $socket = IO::Socket::INET->new( - PeerAddr => $hostspec, - Proto => 'udp', - Broadcast => 1, - ReuseAddr => 1) - or warn "Can't bind udp ping socket: $!\n"; - return $socket; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm b/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm deleted file mode 100644 index 82c95ad5..00000000 --- a/lib/mogdeps/Perlbal/Plugin/MaxContentLength.pm +++ /dev/null @@ -1,91 +0,0 @@ -package Perlbal::Plugin::MaxContentLength; - -=head1 NAME - -Perlbal::Plugin::MaxContentLength - Reject large requests - -=head1 SYNOPSIS - - LOAD MaxContentLength - CREATE SERVICE cgilike - # define a service... - SET max_content_length = 100000 - SET plugins = MaxContentLength - ENABLE cgilike - -=head1 DESCRIPTION - -This module rejects requests that are larger than a configured limit. If a -request bears a Content-Length header whose value exceeds the -max_content_length value, the request will be rejected with a 413 "Request -Entity Too Large" error. - -=head1 AUTHOR - -Adam Thomason, Eathomason@sixapart.comE - -=head1 COPYRIGHT AND LICENSE - -Copyright 2008 Six Apart Ltd. - -This module is part of the Perlbal distribution, and as such can be distributed -under the same licence terms as the rest of Perlbal. - -=cut - -use strict; -use warnings; - -use Perlbal; - -sub load { - Perlbal::Service::add_tunable( - max_content_length => { - check_role => '*', - check_type => 'int', - des => "maximum Content-Length allowed, in bytes. 0 for no limit", - default => 0, - }, - ); - return 1; -} - -use constant HANDLE_REQUEST => 0; -use constant IGNORE_REQUEST => 1; - -sub register { - my ($class, $svc) = @_; - - my $cfg = $svc->{extra_config}; - return unless $cfg; - - $svc->register_hook('MaxContentLength', 'start_http_request' => sub { - my $client = shift; - return IGNORE_REQUEST unless $client; - - # allow request if max is disabled - return HANDLE_REQUEST unless $cfg->{max_content_length}; - - my $headers = $client->{req_headers}; - return HANDLE_REQUEST unless $headers; - - # allow requests which don't have a Content-Length header - my $length = $headers->header('content-length'); - return HANDLE_REQUEST unless $length; - - # allow requests under the cap - return HANDLE_REQUEST if $length <= $cfg->{max_content_length}; - - $client->send_response(413, "Content too long.\n"); - return IGNORE_REQUEST; - }); -} - -sub unregister { - my ($class, $svc) = @_; - - $svc->unregister_hooks('MaxContentLength'); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/NotModified.pm b/lib/mogdeps/Perlbal/Plugin/NotModified.pm deleted file mode 100644 index d6d965fb..00000000 --- a/lib/mogdeps/Perlbal/Plugin/NotModified.pm +++ /dev/null @@ -1,84 +0,0 @@ -package Perlbal::Plugin::NotModified; - -use Perlbal; -use strict; -use warnings; - -# Takes settings in perlbal like: -# SET ss.notmodified.host_pattern = ^example\.com -# -# The value is a regular expression to match against the Host: header on the incoming request. - -sub load { - my $class = shift; - return 1; -} - -sub unload { - my $class = shift; - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - my $host_check_regex = undef; - - my $start_http_request_hook = sub { - my Perlbal::ClientHTTPBase $client = shift; - my Perlbal::HTTPHeaders $hds = $client->{req_headers}; - return 0 unless $hds; - - my $uri = $hds->request_uri; - - return 0 unless $uri; - - my $host = $hds->header("Host"); - - return 0 unless $host; - return 0 unless $host =~ $host_check_regex; - - my $ims = $hds->header("If-Modified-Since"); - - return 0 unless $ims; - - $client->send_response(304, "Not Modified"); - - return 1; - }; - - # register things to take in configuration regular expressions - $svc->register_setter('NotModified', 'host_pattern', sub { - my ($out, $what, $val) = @_; - return 0 unless $what && $val; - - my $err = sub { - $out->("ERROR: $_[0]") if $out; - return 0; - }; - - unless (length $val) { - $host_check_regex = undef; - $svc->unregister_hooks('NotModified'); - return 1; - } - - $host_check_regex = qr/$val/; - $svc->register_hook('NotModified', 'start_http_request', $start_http_request_hook); - - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->unregister_hooks('NotModified'); - $svc->unregister_setters('NotModified'); - return 1; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Palimg.pm b/lib/mogdeps/Perlbal/Plugin/Palimg.pm deleted file mode 100644 index 7e7abb1e..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Palimg.pm +++ /dev/null @@ -1,368 +0,0 @@ -########################################################################### -# Palimg plugin that allows Perlbal to serve palette altered images -########################################################################### - -package Perlbal::Plugin::Palimg; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # verify that an incoming request is a palimg request - $svc->register_hook('Palimg', 'start_serve_request', sub { - my Perlbal::ClientHTTPBase $obj = $_[0]; - return 0 unless $obj; - my Perlbal::HTTPHeaders $hd = $obj->{req_headers}; - my $uriref = $_[1]; - return 0 unless $uriref; - - # if this is palimg, peel off the requested modifications and put in headers - return 0 unless $$uriref =~ m!^/palimg/(.+)\.(\w+)(.*)$!; - my ($fn, $ext, $extra) = ($1, $2, $3); - return 0 unless $extra; - my ($palspec) = $extra =~ m!^/p(.+)$!; - return 0 unless $fn && $palspec; - - # must be ok, setup for it - $$uriref = "/palimg/$fn.$ext"; - $obj->{scratch}->{palimg} = [ $ext, $palspec ]; - return 0; - }); - - # actually serve a palimg - $svc->register_hook('Palimg', 'start_send_file', sub { - my Perlbal::ClientHTTPBase $obj = $_[0]; - return 0 unless $obj && - (my $palimginfo = $obj->{scratch}->{palimg}); - - # turn off writes - $obj->watch_write(0); - - # create filehandle for reading - my $data = ''; - Perlbal::AIO::aio_read($obj->reproxy_fh, 0, 2048, $data, sub { - # got data? undef is error - return $obj->_simple_response(500) unless $_[0] > 0; - - # pass down to handler - my Perlbal::HTTPHeaders $hd = $obj->{req_headers}; - my $res = PalImg::modify_file(\$data, $palimginfo->[0], $palimginfo->[1]); - return $obj->_simple_response(500) unless defined $res; - return $obj->_simple_response($res) if $res; - - # seek into the file now so sendfile starts further in - my $ld = length $data; - sysseek($obj->{reproxy_fh}, $ld, &POSIX::SEEK_SET); - $obj->{reproxy_file_offset} = $ld; - - # re-enable writes after we get data - $obj->tcp_cork(1); # by setting reproxy_file_offset above, it won't cork, so we cork it - $obj->write($data); - $obj->watch_write(1); - }); - - return 1; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Palimg'); - return 1; -} - -# called when we are loaded/unloaded ... someday add some stats viewing -# commands here? -sub load { return 1; } -sub unload { return 1; } - -####### PALIMG START ########################################################################### -package PalImg; - -sub parse_hex_color -{ - my $color = shift; - return [ map { hex(substr($color, $_, 2)) } (0,2,4) ]; -} - -sub modify_file -{ - my ($data, $type, $palspec) = @_; - - # palette altering - my %pal_colors; - if (my $pals = $palspec) { - my $hx = "[0-9a-f]"; - if ($pals =~ /^g($hx{2,2})($hx{6,6})($hx{2,2})($hx{6,6})$/) { - # gradient from index $1, color $2, to index $3, color $4 - my $from = hex($1); - my $to = hex($3); - return 404 if $from == $to; - my $fcolor = parse_hex_color($2); - my $tcolor = parse_hex_color($4); - if ($to < $from) { - ($from, $to, $fcolor, $tcolor) = - ($to, $from, $tcolor, $fcolor); - } - for (my $i=$from; $i<=$to; $i++) { - $pal_colors{$i} = [ map { - int($fcolor->[$_] + - ($tcolor->[$_] - $fcolor->[$_]) * - ($i-$from) / ($to-$from)) - } (0..2) ]; - } - } elsif ($pals =~ /^t($hx{6,6})($hx{6,6})?$/) { - # tint everything towards color - my ($t, $td) = ($1, $2); - $pal_colors{'tint'} = parse_hex_color($t); - $pal_colors{'tint_dark'} = $td ? parse_hex_color($td) : [0,0,0]; - } elsif (length($pals) > 42 || $pals =~ /[^0-9a-f]/) { - return 404; - } else { - my $len = length($pals); - return 404 if $len % 7; # must be multiple of 7 chars - for (my $i = 0; $i < $len/7; $i++) { - my $palindex = hex(substr($pals, $i*7, 1)); - $pal_colors{$palindex} = [ - hex(substr($pals, $i*7+1, 2)), - hex(substr($pals, $i*7+3, 2)), - hex(substr($pals, $i*7+5, 2)), - substr($pals, $i*7+1, 6), - ]; - } - } - } - - if (%pal_colors) { - if ($type eq 'gif') { - return 404 unless PaletteModify::new_gif_palette($data, \%pal_colors); - } elsif ($type eq 'png') { - return 404 unless PaletteModify::new_png_palette($data, \%pal_colors); - } - } - - # success - return 0; -} -####### PALIMG END ############################################################################# - -####### PALETTEMODIFY START #################################################################### -package PaletteModify; - -BEGIN { - $PaletteModify::HAVE_CRC = eval "use String::CRC32 (); 1;"; -} - -sub common_alter -{ - my ($palref, $table) = @_; - my $length = length $table; - - my $pal_size = $length / 3; - - # tinting image? if so, we're remaking the whole palette - if (my $tint = $palref->{'tint'}) { - my $dark = $palref->{'tint_dark'}; - my $diff = [ map { $tint->[$_] - $dark->[$_] } (0..2) ]; - $palref = {}; - for (my $idx=0; $idx<$pal_size; $idx++) { - for my $c (0..2) { - my $curr = ord(substr($table, $idx*3+$c)); - my $p = \$palref->{$idx}->[$c]; - $$p = int($dark->[$c] + $diff->[$c] * $curr / 255); - } - } - } - - while (my ($idx, $c) = each %$palref) { - next if $idx >= $pal_size; - substr($table, $idx*3+$_, 1) = chr($c->[$_]) for (0..2); - } - - return $table; -} - -sub new_gif_palette -{ - my ($data, $palref) = @_; - - # make sure we have data to operate on, or the substrs below die - return unless $$data; - - # 13 bytes for magic + image info (size, color depth, etc) - # and then the global palette table (3*256) - my $header = substr($$data, 0, 13+3*256); - - # figure out how big global color table is (don't want to overwrite it) - my $pf = ord substr($header, 10, 1); - my $gct = 2 ** (($pf & 7) + 1); # last 3 bits of packaged fields - - # final sanity check for size so the substr below doesn't die - return unless length $header >= 13 + 3 * $gct; - - substr($header, 13, 3*$gct) = common_alter($palref, substr($header, 13, 3*$gct)); - $$data = $header; - return 1; -} - -sub new_png_palette -{ - my ($data, $palref) = @_; - - # subroutine for reading data - my ($curidx, $maxlen) = (0, length $$data); - my $read = sub { - # put $_[1] data into scalar reference $_[0] - return undef if $_[1] + $curidx > $maxlen; - ${$_[0]} = substr($$data, $curidx, $_[1]); - $curidx += $_[1]; - return length ${$_[0]}; - }; - - # without this module, we can't proceed. - return 0 unless $PaletteModify::HAVE_CRC; - - my $imgdata; - - # Validate PNG signature - my $png_sig = pack("H16", "89504E470D0A1A0A"); - my $sig; - $read->(\$sig, 8); - return 0 unless $sig eq $png_sig; - $imgdata .= $sig; - - # Start reading in chunks - my ($length, $type) = (0, ''); - while ($read->(\$length, 4)) { - - $imgdata .= $length; - $length = unpack("N", $length); - return 0 unless $read->(\$type, 4) == 4; - $imgdata .= $type; - - if ($type eq 'IHDR') { - my $header; - $read->(\$header, $length+4); - my ($width,$height,$depth,$color,$compression, - $filter,$interlace, $CRC) - = unpack("NNCCCCCN", $header); - return 0 unless $color == 3; # unpaletted image - $imgdata .= $header; - } elsif ($type eq 'PLTE') { - # Finally, we can go to work - my $palettedata; - $read->(\$palettedata, $length); - $palettedata = common_alter($palref, $palettedata); - $imgdata .= $palettedata; - - # Skip old CRC - my $skip; - $read->(\$skip, 4); - - # Generate new CRC - my $crc = String::CRC32::crc32($type . $palettedata); - $crc = pack("N", $crc); - - $imgdata .= $crc; - $$data = $imgdata; - return 1; - } else { - my $skip; - # Skip rest of chunk and add to imgdata - # Number of bytes is +4 because of CRC - # - for (my $count=0; $count < $length + 4; $count++) { - $read->(\$skip, 1); - $imgdata .= $skip; - } - } - } - - return 0; -} -####### PALETTEMODIFY END ###################################################################### - -1; - -__END__ - -=head1 NAME - -Perlbal::Plugin::Palimg - plugin that allows Perlbal to serve palette altered images - -=head1 VERSION - -This documentation refers to C that ships with Perlbal 1.50 - -=head1 DESCRIPTION - -Palimg is a perlbal plugin that allows you to modify C and C on the fly. Put the images you want to be able to modify into the C directory. You modify them by adding C to the end of the url, where SPEC is one of the below defined commands (gradient, tint, etc). - -=head1 CONFIGURING PERLBAL - -To configure your Perlbal installation to use Palimg you'll need to C the plugin then add a service parameter to a C service to activate it. - -Example C: - - LOAD palimg - - CREATE SERVICE palex - SET listen = ${ip:eth0}:80 - SET role = web_server - SET plugins = palimg - SET docroot = /usr/share/doc/ - SET dirindexing = 0 - ENABLE palex - -=head1 GRADIENTS - -You can change the gradient of the image by adding C to the end of the url. C<00> is the index where the gradient starts and C<111111> is the color (in hex) of the beginning of the gradient. C<64> is the index of the end of the gradient and C is the color of the end of the gradient. Note that all colors specified in hex should be lowercase. - -Example: - - http://192.168.0.1/palimg/logo.gif/pg01aaaaaa99cccccc - -=head1 TINTING - -You can tint the image by adding C to the end of the url. C<000000> should be replaced with the color to tint towards. C is optional and defines the "dark" tint color. Both colors should be specified as lowercase hex numbers. - -Example: - - http://192.168.0.1/palimg/logo.gif/pt1c1c1c22dba1 - -=head1 PALETTE REPLACEMENT - -You can specify a palette to replace the palette of the image. Do this by adding up to six sets of seven hex lowercase numbers prefixed with C

to the end of the URL. - -Example: - - http://192.168.0.1/palimg/logo.gif/p01234567890abcfffffffcccccccddddddd - -=head1 BUGS AND LIMITATIONS - -There are no known bugs in this module. - -Please report problems to the Perlbal mailing list, http://groups.google.com/group/perlbal - -Patches are welcome. - -=head1 AUTHORS - -Brad Fitzpatrick -Mark Smith - -=head1 LICENSE AND COPYRIGHT - -Artistic/GPLv2, at your choosing. - -Copyright 2004, Danga Interactive -Copyright 2005-2007, Six Apart Ltd diff --git a/lib/mogdeps/Perlbal/Plugin/Queues.pm b/lib/mogdeps/Perlbal/Plugin/Queues.pm deleted file mode 100644 index 027110d3..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Queues.pm +++ /dev/null @@ -1,55 +0,0 @@ -########################################################################### -# simple queue length header inclusion plugin -########################################################################### - -package Perlbal::Plugin::Queues; - -use strict; -use warnings; -no warnings qw(deprecated); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # more complicated statistics - $svc->register_hook('Queues', 'backend_client_assigned', sub { - my Perlbal::BackendHTTP $obj = shift; - my Perlbal::HTTPHeaders $hds = $obj->{req_headers}; - my Perlbal::Service $svc = $obj->{service}; - return 0 unless defined $hds && defined $svc; - - # determine age of oldest (first in line) - my $now = time; - my Perlbal::ClientProxy $cp = $svc->{waiting_clients}->[0]; - my $age = defined $cp ? ($now - $cp->{last_request_time}) : 0; - - # now do the age of the high priority queue - $cp = $svc->{waiting_clients_highpri}->[0]; - my $hpage = defined $cp ? ($now - $cp->{last_request_time}) : 0; - - # setup the queue length headers - $hds->header('X-Queue-Count', scalar(@{$svc->{waiting_clients}})); - $hds->header('X-Queue-Age', $age); - $hds->header('X-HP-Queue-Count', scalar(@{$svc->{waiting_clients_highpri}})); - $hds->header('X-HP-Queue-Age', $hpage); - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Queues'); - return 1; -} - -# we don't do anything in here -sub load { return 1; } -sub unload { return 1; } - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Redirect.pm b/lib/mogdeps/Perlbal/Plugin/Redirect.pm deleted file mode 100644 index 19cdb1d3..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Redirect.pm +++ /dev/null @@ -1,130 +0,0 @@ -package Perlbal::Plugin::Redirect; -use strict; -use warnings; - -sub handle_request { - my ($svc, $pb) = @_; - - my $mappings = $svc->{extra_config}{_redirect_host}; - my $req_header = $pb->{req_headers}; - - # returns 1 if done with client, 0 if no action taken - my $map_using = sub { - my ($match_on) = @_; - - my $target_host = $mappings->{$match_on}; - - return 0 unless $target_host; - - my $path = $req_header->request_uri; - - my $res_header = Perlbal::HTTPHeaders->new_response(301); - $res_header->header('Location' => "http://$target_host$path"); - $res_header->header('Content-Length' => 0); - # For some reason a follow-up request gets a "400 Bad request" response, - # so until someone has time to figure out why, just punt and disable - # keep-alives after this request. - $res_header->header('Connection' => 'close'); - $pb->write($res_header->to_string_ref()); - - return 1; - }; - - # The following is lifted wholesale from the vhosts plugin. - # FIXME: Factor it out to a utility function, I guess? - # - # foo.site.com should match: - # foo.site.com - # *.foo.site.com - # *.site.com - # *.com - # * - - my $vhost = lc($req_header->header("Host")); - - # if no vhost, just try the * mapping - return $map_using->("*") unless $vhost; - - # Strip off the :portnumber, if any - $vhost =~ s/:\d+$//; - - # try the literal mapping - return 1 if $map_using->($vhost); - - # and now try wildcard mappings, removing one part of the domain - # at a time until we find something, or end up at "*" - - # first wildcard, prepending the "*." - my $wild = "*.$vhost"; - return 1 if $map_using->($wild); - - # now peel away subdomains - while ($wild =~ s/^\*\.[\w\-\_]+/*/) { - return 1 if $map_using->($wild); - } - - # last option: use the "*" wildcard - return $map_using->("*"); -} - -sub register { - my ($class, $svc) = @_; - - $svc->register_hook('Redirect', 'start_http_request', sub { handle_request($svc, $_[0]); }); -} - -sub unregister { - my ($class, $svc) = @_; - $svc->unregister_hooks('Redirect'); -} - -sub handle_redirect_command { - my $mc = shift->parse(qr/^redirect\s+host\s+(\S+)\s+(\S+)$/, "usage: REDIRECT HOST "); - my ($match_host, $target_host) = $mc->args; - - my $svcname; - unless ($svcname ||= $mc->{ctx}{last_created}) { - return $mc->err("No service name in context from CREATE SERVICE or USE "); - } - - my $svc = Perlbal->service($svcname); - return $mc->err("Non-existent service '$svcname'") unless $svc; - - $svc->{extra_config}{_redirect_host} ||= {}; - $svc->{extra_config}{_redirect_host}{lc($match_host)} = lc($target_host); - - return 1; -} - -# called when we are loaded -sub load { - Perlbal::register_global_hook('manage_command.redirect', \&handle_redirect_command); - - return 1; -} - -# called for a global unload -sub unload { - return 1; -} - -1; - -=head1 NAME - -Perlbal::Plugin::Redirect - Plugin to do redirecting in Perlbal land - -=head1 SYNOPSIS - - LOAD redirect - - CREATE SERVICE redirector - SET role = web_server - SET plugins = redirect - REDIRECT HOST example.com www.example.net - ENABLE redirector - -=head1 LIMITATIONS - -Right now this can only redirect at the hostname level. Also, it just -assumes you want an http: URL. diff --git a/lib/mogdeps/Perlbal/Plugin/Stats.pm b/lib/mogdeps/Perlbal/Plugin/Stats.pm deleted file mode 100644 index e28b8866..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Stats.pm +++ /dev/null @@ -1,167 +0,0 @@ -########################################################################### -# basic Perlbal statistics gatherer -########################################################################### - -package Perlbal::Plugin::Stats; - -use strict; -use warnings; -no warnings qw(deprecated); - -use Time::HiRes qw(gettimeofday tv_interval); - -# setup our package variables -our %statobjs; # { svc_name => [ service, statobj ], svc_name => [ service, statobj ], ... } - -# define all stats keys here -our @statkeys = qw( files_sent files_reproxied - web_requests proxy_requests - proxy_requests_highpri ); - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - - # create a stats object - my $sobj = Perlbal::Plugin::Stats::Storage->new(); - $statobjs{$svc->{name}} = [ $svc, $sobj ]; - - # simple events we count are done here. when the hook on the left side is called, - # we simply increment the count of the stat on the right side. - my %simple = qw( - start_send_file files_sent - start_file_reproxy files_reproxied - start_web_request web_requests - ); - - # create hooks for %simple things - while (my ($hook, $stat) = each %simple) { - eval "\$svc->register_hook('Stats', '$hook', sub { \$sobj->{'$stat'}++; return 0; });"; - return undef if $@; - } - - # more complicated statistics - $svc->register_hook('Stats', 'backend_client_assigned', sub { - my Perlbal::BackendHTTP $be = shift; - my Perlbal::ClientProxy $cp = $be->{client}; - $sobj->{pending}->{"$cp"} = [ gettimeofday() ]; - ($cp->{high_priority} ? $sobj->{proxy_requests_highpri} : $sobj->{proxy_requests})++; - return 0; - }); - $svc->register_hook('Stats', 'backend_response_received', sub { - my Perlbal::BackendHTTP $be = shift; - my Perlbal::ClientProxy $obj = $be->{client}; - my $ot = delete $sobj->{pending}->{"$obj"}; - return 0 unless defined $ot; - - # now construct data to put in recent - if (defined $obj->{req_headers}) { - my $uri = 'http://' . ($obj->{req_headers}->header('Host') || 'unknown') . $obj->{req_headers}->request_uri; - push @{$sobj->{recent}}, sprintf('%-6.4f %s', tv_interval($ot), $uri); - shift(@{$sobj->{recent}}) if scalar(@{$sobj->{recent}}) > 100; # if > 100 items, lose one - } - return 0; - }); - - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - - # clean up time - $svc->unregister_hooks('Stats'); - delete $statobjs{$svc->{name}}; - return 1; -} - -# called when we are loaded -sub load { - # setup a management command to dump statistics - Perlbal::register_global_hook("manage_command.stats", sub { - my @res; - - # create temporary object for stats storage - my $gsobj = Perlbal::Plugin::Stats::Storage->new(); - - # dump per service - foreach my $svc (keys %statobjs) { - my $sobj = $statobjs{$svc}->[1]; - - # for now, simply dump the numbers we have - foreach my $key (sort @statkeys) { - push @res, sprintf("%-15s %-25s %12d", $svc, $key, $sobj->{$key}); - $gsobj->{$key} += $sobj->{$key}; - } - } - - # global stats - foreach my $key (sort @statkeys) { - push @res, sprintf("%-15s %-25s %12d", 'total', $key, $gsobj->{$key}); - } - - push @res, "."; - return \@res; - }); - - # recent requests and how long they took - Perlbal::register_global_hook("manage_command.recent", sub { - my @res; - foreach my $svc (keys %statobjs) { - my $sobj = $statobjs{$svc}->[1]; - push @res, "$svc $_" - foreach @{$sobj->{recent}}; - } - - push @res, "."; - return \@res; - }); - - return 1; -} - -# called for a global unload -sub unload { - # unregister our global hooks - Perlbal::unregister_global_hook('manage_command.stats'); - Perlbal::unregister_global_hook('manage_command.recent'); - - # take out all service stuff - foreach my $statref (values %statobjs) { - $statref->[0]->unregister_hooks('Stats'); - } - %statobjs = (); - - return 1; -} - -# statistics storage object -package Perlbal::Plugin::Stats::Storage; - -use fields ( - 'files_sent', # files sent from disk (includes reproxies and regular web requests) - 'files_reproxied', # files we've sent via reproxying (told to by backend) - 'web_requests', # requests we sent ourselves (no reproxy, no backend) - 'proxy_requests', # regular requests that went to a backend to be served - 'proxy_requests_highpri', # same as above, except high priority - - 'pending', # hashref; { "obj" => time_start } - 'recent', # arrayref; strings of recent URIs and times - ); - -sub new { - my Perlbal::Plugin::Stats::Storage $self = shift; - $self = fields::new($self) unless ref $self; - - # 0 initialize everything here - $self->{$_} = 0 foreach @Perlbal::Plugin::Stats::statkeys; - - # other setup - $self->{pending} = {}; - $self->{recent} = []; - - return $self; -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Vhosts.pm b/lib/mogdeps/Perlbal/Plugin/Vhosts.pm deleted file mode 100644 index d1a36823..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Vhosts.pm +++ /dev/null @@ -1,179 +0,0 @@ -########################################################################### -# plugin to do name-based virtual hosts -########################################################################### - -# things to test: -# one persistent connection, first to a docs plugin, then to web proxy... see if it returns us to our base class after end of request -# PUTing a large file to a selector, seeing if it is put correctly to the PUT-enabled web_server proxy -# obvious cases: non-existent domains, default domains (*), proper matching (foo.brad.lj before *.brad.lj) -# - -package Perlbal::Plugin::Vhosts; - -use strict; -use warnings; -no warnings qw(deprecated); - -our %Services; # service_name => $svc - -# when "LOAD" directive loads us up -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.vhost', sub { - my $mc = shift->parse(qr/^vhost\s+(?:(\w+)\s+)?(\S+)\s*=\s*(\w+)$/, - "usage: VHOST [] = "); - my ($selname, $host, $target) = $mc->args; - unless ($selname ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service name not implied from context"); - } - - my $ss = Perlbal->service($selname); - return $mc->err("Service '$selname' is not a selector service") - unless $ss && $ss->{role} eq "selector"; - - $host = lc $host; - return $mc->err("invalid host pattern: '$host'") - unless $host =~ /^[\w\-\_\.\*\;\:]+$/; - - $ss->{extra_config}->{_vhosts} ||= {}; - $ss->{extra_config}->{_vhosts}{$host} = $target; - - return $mc->ok; - }); - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - - Perlbal::unregister_global_hook('manage_command.vhost'); - unregister($class, $_) foreach (values %Services); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - unless ($svc && $svc->{role} eq "selector") { - die "You can't load the vhost plugin on a service not of role selector.\n"; - } - - $svc->selector(\&vhost_selector); - $svc->{extra_config}->{_vhosts} = {}; - - $Services{"$svc"} = $svc; - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->selector(undef); - delete $Services{"$svc"}; - return 1; -} - -sub dumpconfig { - my ($class, $svc) = @_; - - my $vhosts = $svc->{extra_config}->{_vhosts}; - - return unless $vhosts; - - my @return; - - while (my ($vhost, $target) = each %$vhosts) { - push @return, "VHOST $vhost = $target"; - } - - return @return; -} - -# call back from Service via ClientHTTPBase's event_read calling service->select_new_service(Perlbal::ClientHTTPBase) -sub vhost_selector { - my Perlbal::ClientHTTPBase $cb = shift; - - my $req = $cb->{req_headers}; - return $cb->_simple_response(404, "Not Found (no reqheaders)") unless $req; - - my $vhost = $req->header("Host"); - - # Browsers and the Apache API considers 'www.example.com.' == 'www.example.com' - $vhost and $vhost =~ s/\.$//; - - my $uri = $req->request_uri; - my $maps = $cb->{service}{extra_config}{_vhosts} ||= {}; - - # ability to ask for one host, but actually use another. (for - # circumventing javascript/java/browser host restrictions when you - # actually control two domains). - if ($vhost && $uri =~ m!^/__using/([\w\.]+)(?:/\w+)(?:\?.*)?$!) { - my $alt_host = $1; - - # update our request object's Host header, if we ended up switching them - # around with /__using/... - my $svc_name = $maps->{"$vhost;using:$alt_host"}; - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $cb->_simple_response(404, "Vhost twiddling not configured for requested pair."); - return 1; - } - - $req->header("Host", $alt_host); - $svc->adopt_base_client($cb); - return 1; - } - - # returns 1 if done with client, 0 if no action taken - my $map_using = sub { - my ($match_on, $force) = @_; - - my $map_name = $maps->{$match_on}; - my $svc = $map_name ? Perlbal->service($map_name) : undef; - - return 0 unless $svc || $force; - - unless ($svc) { - $cb->_simple_response(404, "Not Found (no configured vhost)"); - return 1; - } - - $svc->adopt_base_client($cb); - return 1; - }; - - # foo.site.com should match: - # foo.site.com - # *.foo.site.com -- this one's questionable, but might as well? - # *.site.com - # *.com - # * - - # if no vhost, just try the * mapping - return $map_using->("*", 1) unless $vhost; - - # Strip off the :portnumber, if any - $vhost =~ s/:\d+$//; - - # try the literal mapping - return if $map_using->($vhost); - - # and now try wildcard mappings, removing one part of the domain - # at a time until we find something, or end up at "*" - - # first wildcard, prepending the "*." - my $wild = "*.$vhost"; - return if $map_using->($wild); - - # now peel away subdomains - while ($wild =~ s/^\*\.[\w\-\_]+/*/) { - return if $map_using->($wild); - } - - # last option: use the "*" wildcard - return $map_using->("*", 1); -} - -1; diff --git a/lib/mogdeps/Perlbal/Plugin/Vpaths.pm b/lib/mogdeps/Perlbal/Plugin/Vpaths.pm deleted file mode 100644 index 4108597d..00000000 --- a/lib/mogdeps/Perlbal/Plugin/Vpaths.pm +++ /dev/null @@ -1,105 +0,0 @@ -########################################################################### -# plugin to use with selectors to select by path -# -# this will not play well with the Vhosts plugin or any other selector -# behavior plugins. -# -# this has also not been optimized for huge volume sites. -########################################################################### - -package Perlbal::Plugin::Vpaths; - -use strict; -use warnings; -no warnings qw(deprecated); - -our %Services; # service_name => $svc - -# when "LOAD" directive loads us up -sub load { - my $class = shift; - - Perlbal::register_global_hook('manage_command.vpath', sub { - my $mc = shift->parse(qr/^vpath\s+(?:(\w+)\s+)?(\S+)\s*=\s*(\w+)$/, - "usage: VPATH [] = "); - my ($selname, $regex, $target) = $mc->args; - unless ($selname ||= $mc->{ctx}{last_created}) { - return $mc->err("omitted service name not implied from context"); - } - - my $ss = Perlbal->service($selname); - return $mc->err("Service '$selname' is not a selector service") - unless $ss && $ss->{role} eq "selector"; - - my $cregex = qr/$regex/; - return $mc->err("invalid regular expression: '$regex'") - unless $cregex; - - $ss->{extra_config}->{_vpaths} ||= []; - push @{$ss->{extra_config}->{_vpaths}}, [ $cregex, $target ]; - - return $mc->ok; - }); - return 1; -} - -# unload our global commands, clear our service object -sub unload { - my $class = shift; - - Perlbal::unregister_global_hook('manage_command.vpath'); - unregister($class, $_) foreach (values %Services); - return 1; -} - -# called when we're being added to a service -sub register { - my ($class, $svc) = @_; - unless ($svc && $svc->{role} eq "selector") { - die "You can't load the vpath plugin on a service not of role selector.\n"; - } - - $svc->selector(\&vpath_selector); - $svc->{extra_config}->{_vpaths} = []; - - $Services{"$svc"} = $svc; - return 1; -} - -# called when we're no longer active on a service -sub unregister { - my ($class, $svc) = @_; - $svc->selector(undef); - delete $Services{"$svc"}; - return 1; -} - -# call back from Service via ClientHTTPBase's event_read calling service->select_new_service(Perlbal::ClientHTTPBase) -sub vpath_selector { - my Perlbal::ClientHTTPBase $cb = shift; - - my $req = $cb->{req_headers}; - return $cb->_simple_response(404, "Not Found (no reqheaders)") unless $req; - - my $uri = $req->request_uri; - my $maps = $cb->{service}{extra_config}{_vpaths} ||= {}; - - # iterate down the list of paths, find any matches - foreach my $row (@$maps) { - next unless $uri =~ /$row->[0]/; - - my $svc_name = $row->[1]; - my $svc = $svc_name ? Perlbal->service($svc_name) : undef; - unless ($svc) { - $cb->_simple_response(404, "Not Found ($svc_name not a defined service)"); - return 1; - } - - $svc->adopt_base_client($cb); - return 1; - } - - return 0; -} - -1; diff --git a/lib/mogdeps/Perlbal/Pool.pm b/lib/mogdeps/Perlbal/Pool.pm deleted file mode 100644 index a6d88a1e..00000000 --- a/lib/mogdeps/Perlbal/Pool.pm +++ /dev/null @@ -1,301 +0,0 @@ -###################################################################### -# Pool class -###################################################################### -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::Pool; -use strict; -use warnings; - -use Perlbal::BackendHTTP; - -# how often to reload the nodefile -use constant NODEFILE_RELOAD_FREQ => 3; - -# balance methods we support (note: sendstats mode is now removed) -use constant BM_ROUNDROBIN => 2; -use constant BM_RANDOM => 3; - -use fields ( - 'name', # string; name of this pool - 'use_count', # int; number of services using us - 'nodes', # arrayref; [ip, port] values (port defaults to 80) - 'node_count', # int; number of nodes - 'node_used', # hashref; { ip:port => use count } - 'balance_method', # int; BM_ constant from above - - # used in nodefile mode - 'nodefile', # string; filename to read nodes from - 'nodefile.lastmod', # unix time nodefile was last modified - 'nodefile.lastcheck', # unix time nodefile was last stated - 'nodefile.checking', # boolean; if true AIO is stating the file for us - ); - -sub new { - my Perlbal::Pool $self = shift; - $self = fields::new($self) unless ref $self; - - my ($name) = @_; - - $self->{name} = $name; - $self->{use_count} = 0; - - $self->{nodes} = []; - $self->{node_count} = 0; - $self->{node_used} = {}; - - $self->{nodefile} = undef; - $self->{balance_method} = BM_RANDOM; - - return $self; -} - -sub set { - my Perlbal::Pool $self = shift; - - my ($key, $val, $mc) = @_; - my $set = sub { $self->{$key} = $val; return $mc->ok; }; - - if ($key eq 'nodefile') { - # allow to unset it, which stops us from checking it further, - # but doesn't clear our current list of nodes - if ($val =~ /^(?:none|undef|null|""|'')$/) { - $self->{'nodefile'} = undef; - $self->{'nodefile.lastmod'} = 0; - $self->{'nodefile.checking'} = 0; - $self->{'nodefile.lastcheck'} = 0; - return $mc->ok; - } - - # enforce that it exists from here on out - return $mc->err("File not found") - unless -e $val; - - # force a reload - $self->{'nodefile'} = $val; - $self->{'nodefile.lastmod'} = 0; - $self->{'nodefile.checking'} = 0; - $self->load_nodefile; - $self->{'nodefile.lastcheck'} = time; - return $mc->ok; - } - - if ($key eq "balance_method") { - $val = { - 'random' => BM_RANDOM, - }->{$val}; - return $mc->err("Unknown balance method") - unless $val; - return $set->(); - } - -} - -sub dumpconfig { - my Perlbal::Pool $self = shift; - my $name = $self->{name}; - - my @return; - - if (my $nodefile = $self->{'nodefile'}) { - push @return, "SET nodefile = $nodefile"; - } else { - foreach my $node (@{$self->{nodes}}) { - my ($ip, $port) = @$node; - push @return, "POOL ADD $name $ip:$port"; - } - } - return @return; -} - -# returns string of balance method -sub balance_method { - my Perlbal::Pool $self = $_[0]; - my $methods = { - &BM_ROUNDROBIN => "round_robin", - &BM_RANDOM => "random", - }; - return $methods->{$self->{balance_method}} || $self->{balance_method}; -} - -sub load_nodefile { - my Perlbal::Pool $self = shift; - return 0 unless $self->{'nodefile'}; - - if ($Perlbal::OPTMOD_LINUX_AIO) { - return $self->_load_nodefile_async; - } else { - return $self->_load_nodefile_sync; - } -} - -sub _parse_nodefile { - my Perlbal::Pool $self = shift; - my $dataref = shift; - - my @nodes = split(/\r?\n/, $$dataref); - - # prepare for adding nodes - $self->{nodes} = []; - $self->{node_used} = {}; - - foreach (@nodes) { - s/\#.*//; - if (/(\d+\.\d+\.\d+\.\d+)(?::(\d+))?/) { - my ($ip, $port) = ($1, $2); - $port ||= 80; - $self->{node_used}->{"$ip:$port"} ||= 0; # set to 0 if not set - push @{$self->{nodes}}, [ $ip, $port ]; - } - } - - # setup things using new data - $self->{node_count} = scalar @{$self->{nodes}}; -} - -sub _load_nodefile_sync { - my Perlbal::Pool $self = shift; - - my $mod = (stat($self->{nodefile}))[9]; - return if $mod == $self->{'nodefile.lastmod'}; - $self->{'nodefile.lastmod'} = $mod; - - open NODEFILE, $self->{nodefile} or return; - my $nodes; - { local $/ = undef; $nodes = ; } - close NODEFILE; - $self->_parse_nodefile(\$nodes); -} - -sub _load_nodefile_async { - my Perlbal::Pool $self = shift; - - return if $self->{'nodefile.checking'}; - $self->{'nodefile.checking'} = 1; - - Perlbal::AIO::aio_stat($self->{nodefile}, sub { - $self->{'nodefile.checking'} = 0; - - # this might have gotten unset while we were out statting the file, which - # means that the user has instructed us not to use a node file, and may - # have changed the nodes in the pool, so we should do nothing and return - return unless $self->{'nodefile'}; - - # ignore if the file doesn't exist - return unless -e _; - - my $mod = (stat(_))[9]; - return if $mod == $self->{'nodefile.lastmod'}; - $self->{'nodefile.lastmod'} = $mod; - - # construct a filehandle (we only have a descriptor here) - open NODEFILE, $self->{nodefile} - or return; - my $nodes; - { local $/ = undef; $nodes = ; } - close NODEFILE; - - $self->_parse_nodefile(\$nodes); - return; - }); - - return 1; -} - -sub add { - my Perlbal::Pool $self = shift; - my ($ip, $port) = @_; - - $self->remove($ip, $port); # no dupes - - $self->{node_used}->{"$ip:$port"} = 0; - push @{$self->{nodes}}, [ $ip, $port ]; - $self->{node_count} = scalar(@{$self->{nodes}}); -} - -sub remove { - my Perlbal::Pool $self = shift; - my ($ip, $port) = @_; - - delete $self->{node_used}->{"$ip:$port"}; - @{$self->{nodes}} = grep { "$_->[0]:$_->[1]" ne "$ip:$port" } @{$self->{nodes}}; - $self->{node_count} = scalar(@{$self->{nodes}}); -} - -sub get_backend_endpoint { - my Perlbal::Pool $self = $_[0]; - - my @endpoint; # (IP,port) - - # re-load nodefile if necessary - if ($self->{nodefile}) { - my $now = time; - if ($now > $self->{'nodefile.lastcheck'} + NODEFILE_RELOAD_FREQ) { - $self->{'nodefile.lastcheck'} = $now; - $self->load_nodefile; - } - } - - # no nodes? - return () unless $self->{node_count}; - - # pick one randomly - return @{$self->{nodes}[int(rand($self->{node_count}))]}; -} - -sub backend_should_live { - my Perlbal::Pool $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # a backend stays alive if we still have users. eventually this whole - # function might do more and actually take into account the individual - # backend, but for now, this suits us. - return 1 if $self->{use_count}; - return 0; -} - -sub node_count { - my Perlbal::Pool $self = $_[0]; - return $self->{node_count}; -} - -sub nodes { - my Perlbal::Pool $self = $_[0]; - return $self->{nodes}; -} - -sub node_used { - my Perlbal::Pool $self = $_[0]; - return $self->{node_used}->{$_[1]}; -} - -sub mark_node_used { - my Perlbal::Pool $self = $_[0]; - $self->{node_used}->{$_[1]}++; -} - -sub increment_use_count { - my Perlbal::Pool $self = $_[0]; - $self->{use_count}++; -} - -sub decrement_use_count { - my Perlbal::Pool $self = $_[0]; - $self->{use_count}--; -} - -sub name { - my Perlbal::Pool $self = $_[0]; - return $self->{name}; -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/ReproxyManager.pm b/lib/mogdeps/Perlbal/ReproxyManager.pm deleted file mode 100644 index 58c45aab..00000000 --- a/lib/mogdeps/Perlbal/ReproxyManager.pm +++ /dev/null @@ -1,255 +0,0 @@ -# HTTP connection to non-pool backend nodes (probably fast event-based webservers) -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::ReproxyManager; -use strict; -use warnings; -no warnings qw(deprecated); - -# class storage to store 'host:ip' => $service objects, for making -# reproxies use a service that you can then track -our $ReproxySelf; -our %ReproxyConnecting; # ( host:ip => $backend ); keeps track of outstanding connections to backend that - # are in the connecting state -our %ReproxyBored; # ( host:ip => [ $backend, ... ] ); list of our bored backends -our %ReproxyQueues; # ( host:ip => [ $clientproxy, ... ] ); queued up requests for this backend -our %ReproxyBackends; # ( host:ip => [ $backend, ... ] ); array of backends we have connected -our %ReproxyMax; # ( host:ip => int ); maximum number of connections to have open at any one time -our $ReproxyGlobalMax; # int; the global cap used if no per-host cap is specified -our $NoSpawn = 0; # bool; when set, spawn_backend immediately returns without running -our $LastCleanup = 0; # int; time we last ran our cleanup logic (FIXME: temp hack) - -Perlbal::track_var("rep_connecting", \%ReproxyConnecting); -Perlbal::track_var("rep_bored", \%ReproxyBored); -Perlbal::track_var("rep_queues", \%ReproxyQueues); -Perlbal::track_var("rep_backends", \%ReproxyBackends); - -# singleton new function; returns us if we exist, else creates us -sub get { - return $ReproxySelf if $ReproxySelf; - - # doesn't exist, so create it and return it - my $class = shift; - my $self = {}; - bless $self, $class; - return $ReproxySelf = $self; -} - -# given (clientproxy, primary_res_hdrs), initiate proceedings to process a -# request for a reproxy resource -sub do_reproxy { - my Perlbal::ReproxyManager $self = Perlbal::ReproxyManager->get; # singleton - my Perlbal::ClientProxy $cp = $_[0]; - return undef unless $self && $cp; - - # get data we use - my $datref = $cp->{reproxy_uris}->[0]; - my $ipport = "$datref->[0]:$datref->[1]"; - push @{$ReproxyQueues{$ipport} ||= []}, $cp; - - # see if we should do cleanup (FIXME: temp hack) - my $now = time(); - if ($LastCleanup < $now - 5) { - # remove closed backends from our array. this is O(n) but n is small - # and we're paranoid that just keeping a count would get corrupt over - # time. also removes the backends that have clients that are closed. - @{$ReproxyBackends{$ipport}} = grep { - ! $_->{closed} && (! $_->{client} || ! $_->{client}->{closed}) - } @{$ReproxyBackends{$ipport}}; - - $LastCleanup = $now; - } - - # now start a new backend - $self->spawn_backend($ipport); - return 1; -} - -# part of the reportto interface; this is called when a backend is unable to establish -# a connection with a backend. we simply try the next uri. -sub note_bad_backend_connect { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # decrement counts and undef connecting backend - $ReproxyConnecting{$be->{ipport}} = undef; - - # if nobody waiting, doesn't matter if we couldn't get to this backend - return unless @{$ReproxyQueues{$be->{ipport}} || []}; - - # if we still have some connected backends then ignore this bad connection attempt - return if scalar @{$ReproxyBackends{$be->{ipport}} || []}; - - # at this point, we have no connected backends, and our connecting one failed - # so we want to tell all of the waiting clients to try their next uri, because - # this host is down. - while (my Perlbal::ClientProxy $cp = shift @{$ReproxyQueues{$be->{ipport}}}) { - $cp->try_next_uri; - } - return 1; -} - -# called by a backend when it's ready for a request -sub register_boredom { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # if this backend was connecting - my $ipport = $be->{ipport}; - if ($ReproxyConnecting{$ipport} && $ReproxyConnecting{$ipport} == $be) { - $ReproxyConnecting{$ipport} = undef; - $ReproxyBackends{$ipport} ||= []; - push @{$ReproxyBackends{$ipport}}, $be; - } - - # sometimes a backend is closed but it tries to register with us anyway... ignore it - # but since this might have been our only one, spawn another - if ($be->{closed}) { - $self->spawn_backend($ipport); - return; - } - - # find some clients to use - while (my Perlbal::ClientProxy $cp = shift @{$ReproxyQueues{$ipport} || []}) { - # safety checks - next if $cp->{closed}; - - # give backend to client - $cp->use_reproxy_backend($be); - return; - } - - # no clients if we get here, so push onto bored backend list - push @{$ReproxyBored{$ipport} ||= []}, $be; - - # clean up the front of our list if we can (see docs above) - if (my Perlbal::BackendHTTP $bbe = $ReproxyBored{$ipport}->[0]) { - if ($bbe->{alive_time} < time() - 5) { - $NoSpawn = 1; - $bbe->close('have_newer_bored'); - shift @{$ReproxyBored{$ipport}}; - $NoSpawn = 0; - } - } - return 0; -} - -# backend closed, decrease counts, etc -sub note_backend_close { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # remove closed backends from our array. this is O(n) but n is small - # and we're paranoid that just keeping a count would get corrupt over - # time. - @{$ReproxyBackends{$be->{ipport}}} = grep { - ! $_->{closed} - } @{$ReproxyBackends{$be->{ipport}}}; - - # spawn more if needed - $self->spawn_backend($be->{ipport}); -} - -sub spawn_backend { - return if $NoSpawn; - - my Perlbal::ReproxyManager $self = $_[0]; - my $ipport = $_[1]; - - # if we're already connecting, we don't want to spawn another one - if (my Perlbal::BackendHTTP $be = $ReproxyConnecting{$ipport}) { - # see if this one is too old? - if ($be->{create_time} < (time() - 5)) { # older than 5 seconds? - $self->note_bad_backend_connect($be); - $be->close("connection_timeout"); - - # we return here instead of spawning because closing the backend calls - # note_backend_close which will call spawn_backend again, and at that - # point we won't have a pending connection and can spawn - return; - } else { - # don't spawn more if we're already connecting - return; - } - } - - # if nobody waiting, don't spawn extra connections - return unless @{$ReproxyQueues{$ipport} || []}; - - # don't spawn if we have a bored one already - while (my Perlbal::BackendHTTP $bbe = pop @{$ReproxyBored{$ipport} || []}) { - - # don't use keep-alive connections if we know the server's - # just about to kill the connection for being idle - my $now = time(); - if ($bbe->{disconnect_at} && $now + 2 > $bbe->{disconnect_at} || - $bbe->{alive_time} < $now - 5) - { - $NoSpawn = 1; - $bbe->close("too_close_disconnect"); - $NoSpawn = 0; - next; - } - - # it's good, give it to someone - $self->register_boredom($bbe); - return; - } - - # see if we have too many already? - my $max = $ReproxyMax{$ipport} || $ReproxyGlobalMax || 0; - my $count = scalar @{$ReproxyBackends{$ipport} || []}; - return if $max && ($count >= $max); - - # start one connecting and enqueue - my $be = Perlbal::BackendHTTP->new(undef, split(/:/, $ipport), { reportto => $self }) - or return 0; - $ReproxyConnecting{$ipport} = $be; -} - -sub backend_response_received { - my Perlbal::ReproxyManager $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - my Perlbal::ClientProxy $cp = $be->{client}; - - # if no client, close backend and return 1 - unless ($cp) { - $be->close("lost_client"); - return 1; - } - - # pass on to client - return $cp->backend_response_received($be); -} - -sub dump_state { - my $out = shift; - return unless $out; - - # spits out what we have connecting - while (my ($hostip, $dat) = each %ReproxyConnecting) { - $out->("connecting $hostip 1") if defined $dat; - } - while (my ($hostip, $dat) = each %ReproxyBored) { - $out->("bored $hostip " . scalar(@$dat)); - } - while (my ($hostip, $dat) = each %ReproxyQueues) { - $out->("clients_queued $hostip " . scalar(@$dat)); - } - while (my ($hostip, $dat) = each %ReproxyBackends) { - $out->("backends $hostip " . scalar(@$dat)); - foreach my $be (@$dat) { - $out->("... " . $be->as_string); - } - } - while (my ($hostip, $dat) = each %ReproxyMax) { - $out->("SERVER max_reproxy_connections($hostip) = $dat"); - } - $out->("SERVER max_reproxy_connections = " . ($ReproxyGlobalMax || 0)); - $out->('.'); -} - -1; diff --git a/lib/mogdeps/Perlbal/Service.pm b/lib/mogdeps/Perlbal/Service.pm deleted file mode 100644 index 07d093ba..00000000 --- a/lib/mogdeps/Perlbal/Service.pm +++ /dev/null @@ -1,1686 +0,0 @@ -###################################################################### -# Service class -###################################################################### -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. -# - -package Perlbal::Service; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal::BackendHTTP; -use Perlbal::Cache; -use Perlbal::Util; - -use fields ( - 'name', # scalar: name of this service - 'role', # scalar: role type 'web_server', 'reverse_proxy', etc... - 'enabled', # scalar: bool, whether we're enabled or not (enabled = listening) - - 'pool', # Perlbal::Pool that we're using to allocate nodes if we're in proxy mode - 'listener', # Perlbal::TCPListener object, when enabled - 'reproxy_cache', # Perlbal::Cache object, when enabled - - # end-user tunables - 'listen', # scalar IP:port of where we're listening for new connections - 'docroot', # document root for webserver role - 'dirindexing', # bool: directory indexing? (for webserver role) not async. - 'index_files', # arrayref of filenames to try for index files - 'enable_concatenate_get', # bool: if user can request concatenated files - 'enable_put', # bool: whether PUT is supported - 'max_put_size', # int: max size in bytes of a put file - 'max_chunked_request_size', # int: max size in bytes of a chunked request (to be written to disk first) - 'min_put_directory', # int: number of directories required to exist at beginning of URIs in put - 'enable_delete', # bool: whether DELETE is supported - 'high_priority_cookie', # cookie name to check if client can 'cut in line' and get backends faster - 'high_priority_cookie_contents', # aforementioned cookie value must contain this substring - 'backend_persist_cache', # scalar: max number of persistent backends to hold onto while no clients - 'persist_client', # bool: persistent connections for clients - 'persist_backend', # bool: persistent connections for backends - 'verify_backend', # bool: get attention of backend before giving it clients (using OPTIONS) - 'verify_backend_path', # path to check with the OPTIONS request (default *) - 'max_backend_uses', # max requests to send per kept-alive backend (default 0 = unlimited) - 'connect_ahead', # scalar: number of spare backends to connect to in advance all the time - 'buffer_size', # int: specifies how much data a ClientProxy object should buffer from a backend - 'buffer_size_reproxy_url', # int: same as above but for backends that are reproxying for us - 'queue_relief_size', # int; number of outstanding standard priority - # connections to activate pressure relief at - 'queue_relief_chance', # int:0-100; % chance to take a standard priority - # request when we're in pressure relief mode - 'trusted_upstream_proxies', # Net::Netmask object containing netmasks for trusted upstreams - 'always_trusted', # bool; if true, always trust upstreams - 'blind_proxy', # bool: if true, do not modify X-Forwarded-For, X-Host, or X-Forwarded-Host headers - 'enable_reproxy', # bool; if true, advertise that server will reproxy files and/or URLs - 'reproxy_cache_maxsize', # int; maximum number of reproxy results to be cached. (0 is disabled and default) - 'client_sndbuf_size', # int: bytes for SO_SNDBUF - 'server_process' , # scalar: path to server process (executable) - 'persist_client_timeout', # int: keep-alive timeout in seconds for clients (default is 30) - - # Internal state: - 'waiting_clients', # arrayref of clients waiting for backendhttp conns - 'waiting_clients_highpri', # arrayref of high-priority clients waiting for backendhttp conns - 'waiting_clients_lowpri', # arrayref of low-priority clients waiting for backendhttp conns - 'waiting_client_count', # number of clients waiting for backends - 'waiting_client_map' , # map of clientproxy fd -> 1 (if they're waiting for a conn) - 'pending_connects', # hashref of "ip:port" -> $time (only one pending connect to backend at a time) - 'pending_connect_count', # number of outstanding backend connects - 'bored_backends', # arrayref of backends we've already connected to, but haven't got clients - 'hooks', # hashref: hookname => [ [ plugin, ref ], [ plugin, ref ], ... ] - 'plugins', # hashref: name => 1 - 'plugin_order', # arrayref: name, name, name... - 'plugin_setters', # hashref: { plugin_name => { key_name => coderef } } - 'extra_config', # hashref: extra config options; name => values - 'spawn_lock', # bool: if true, we're currently in spawn_backends - 'extra_headers', # { insert => [ [ header, value ], ... ], remove => [ header, header, ... ], - # set => [ [ header, value ], ... ] }; used in header management interface - 'generation', # int; generation count so we can slough off backends from old pools - 'backend_no_spawn', # { "ip:port" => 1 }; if on, spawn_backends will ignore this ip:port combo - 'buffer_backend_connect', # 0 for of, else, number of bytes to buffer before we ask for a backend - 'selector', # CODE ref, or undef, for role 'selector' services - 'default_service', # Perlbal::Service; name of a service a selector should default to - 'buffer_uploads', # bool; enable/disable the buffered uploads to disk system - 'buffer_uploads_path', # string; path to store buffered upload files - 'buffer_upload_threshold_time', # int; buffer uploads estimated to take longer than this - 'buffer_upload_threshold_size', # int; buffer uploads greater than this size (in bytes) - 'buffer_upload_threshold_rate', # int; buffer uploads uploading at less than this rate (in bytes/sec) - - 'upload_status_listeners', # string: comma separated list of ip:port of UDP upload status receivers - 'upload_status_listeners_sockaddr', # arrayref of sockaddrs (packed ip/port) - - 'enable_ssl', # bool: whether this service speaks SSL to the client - 'ssl_key_file', # file: path to key pem file - 'ssl_cert_file', # file: path to key pem file - 'ssl_cipher_list', # OpenSSL cipher list string - - 'enable_error_retries', # bool: whether we should retry requests after errors - 'error_retry_schedule', # string of comma-separated seconds (full or partial) to delay between retries - 'latency', # int: milliseconds of latency to add to request - - # stats: - '_stat_requests', # total requests to this service - '_stat_cache_hits', # total requests to this service that were served via the reproxy-url cache - ); - -# hash; 'role' => coderef to instantiate a client for this role -our %PluginRoles; - -our $tunables = { - - 'role' => { - des => "What type of service. One of 'reverse_proxy' for a service that load balances to a pool of backend webserver nodes, 'web_server' for a typical webserver', 'management' for a Perlbal management interface (speaks both command-line or HTTP, auto-detected), or 'selector', for a virtual service that maps onto other services.", - required => 1, - - check_type => sub { - my ($self, $val, $errref) = @_; - return 0 unless $val; - return 1 if $val =~ /^(?:reverse_proxy|web_server|management|selector|upload_tracker)$/; - return 1 if $PluginRoles{$val}; - $$errref = "Role not valid for service $self->{name}"; - return 0; - }, - check_role => '*', - setter => sub { - my ($self, $val, $set, $mc) = @_; - my $rv = $set->(); - $self->init; # now that service role is set - return $rv; - }, - }, - - 'listen' => { - check_role => "*", - des => "The ip:port to listen on. For a service to work, you must either make it listen, or make another selector service map to a non-listening service.", - check_type => ["regexp", qr/^(\d+\.\d+\.\d+\.\d+:)?\d+$/, - "Listen argument must be ip:port or port. " . - "e.g. 192.168.0.1:80 or 81"], - setter => sub { - my ($self, $val, $set, $mc) = @_; - - # close/reopen listening socket - if ($val ne ($self->{listen} || "") && $self->{enabled}) { - $self->disable(undef, "force"); - $self->{listen} = $val; - $self->enable(undef); - } - - return $set->(); - }, - }, - - 'backend_persist_cache' => { - des => "The number of backend connections to keep alive on reserve while there are no clients.", - check_type => "int", - default => 2, - check_role => "reverse_proxy", - }, - - 'persist_client' => { - des => "Whether to enable HTTP keep-alives to the end user.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'persist_backend' => { - des => "Whether to enable HTTP keep-alives to the backend webnodes. (Off by default, but highly recommended if Perlbal will be the only client to your backends. If not, beware that Perlbal will hog the connections, starving other clients.)", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'verify_backend' => { - des => "Whether Perlbal should send a quick OPTIONS request to the backends before sending an actual client request to them. If your backend is Apache or some other process-based webserver, this is HIGHLY recommended. All too often a loaded backend box will reply to new TCP connections, but it's the kernel's TCP stack Perlbal is talking to, not an actual Apache process yet. Using this option reduces end-user latency a ton on loaded sites.", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'verify_backend_path' => { - des => "What path the OPTIONS request sent by verify_backend should use. Default is '*'.", - default => '*', - check_role => "reverse_proxy", - }, - - 'max_backend_uses' => { - check_role => "reverse_proxy", - des => "The max number of requests to be made on a single persistent backend connection before releasing the connection. The default value of 0 means no limit, and the connection will only be discarded once the backend asks it to be, or when Perlbal is sufficiently idle.", - default => 0, - }, - - 'max_put_size' => { - default => 0, # no limit - des => "The maximum content-length that will be accepted for a PUT request, if enable_put is on. Default value of 0 means no limit.", - check_type => "size", - check_role => "web_server", - }, - - 'max_chunked_request_size' => { - default => 209715200, # 200 MB. (0: no limit) - des => "The maximum size that will be accepted for a chunked request. Default is 200MB (which is written to disk, buffered uploads must be on). A value of 0 means no limit.", - check_type => "size", - check_role => "*", - }, - - 'buffer_size' => { - des => "How much we'll ahead of a client we'll get while copying from a backend to a client. If a client gets behind this much, we stop reading from the backend for a bit.", - default => "256k", - check_type => "size", - check_role => "reverse_proxy", - }, - - 'buffer_size_reproxy_url' => { - des => "How much we'll get ahead of a client we'll get while copying from a reproxied URL to a client. If a client gets behind this much, we stop reading from the reproxied URL for a bit. The default is lower than the regular buffer_size (50k instead of 256k) because it's assumed that you're only reproxying to large files on event-based webservers, which are less sensitive to many open connections, whereas the 256k buffer size is good for keeping heavy process-based free of slow clients.", - default => "50k", - check_type => "size", - check_role => "reverse_proxy", - }, - - 'queue_relief_size' => { - default => 0, - check_type => "int", - check_role => "reverse_proxy", - }, - - 'queue_relief_chance' => { - default => 0, - check_type => sub { - my ($self, $val, $errref) = @_; - return 1 if $val =~ /^\d+$/ && $val >= 0 && $val <= 100; - $$errref = "Expecting integer value between 0 and 100, inclusive."; - return 0; - }, - check_role => "reverse_proxy", - }, - - 'buffer_backend_connect' => { - des => "How much content-body (POST/PUT/etc) data we read from a client before we start sending it to a backend web node. If 'buffer_uploads' is enabled, this value is used to determine how many bytes are read before Perlbal makes a determination on whether or not to spool the upload to disk.", - default => '100k', - check_type => "size", - check_role => "reverse_proxy", - }, - - 'docroot' => { - des => "Directory root for web server.", - - check_role => "web_server", - val_modify => sub { my $valref = shift; $$valref =~ s!/$!!; }, - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -d $val; - $$errref = "Directory not found for service $self->{name}"; - return 0; - }, - }, - - 'enable_put' => { - des => "Enable HTTP PUT requests.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_delete' => { - des => "Enable HTTP DELETE requests.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_reproxy' => { - des => "Enable 'reproxying' (end-user-transparent internal redirects) to either local files or other URLs. When enabled, the backend servers in the pool that this service is configured for will have access to tell this Perlbal instance to serve any local readable file, or connect to any other URL that this Perlbal can connect to. Only enable this if you trust the backend web nodes.", - default => 0, - check_role => "reverse_proxy", - check_type => "bool", - }, - - 'reproxy_cache_maxsize' => { - des => "Set the maximum number of cached reproxy results (X-REPROXY-CACHE-FOR) that may be kept in the service cache. These cached requests take up about 1.25KB of ram each (on Linux x86), but will vary with usage. Perlbal still starts with 0 in the cache and will grow over time. Be careful when adjusting this and watch your ram usage like a hawk.", - default => 0, - check_role => "reverse_proxy", - check_type => "int", - setter => sub { - my ($self, $val, $set, $mc) = @_; - if ($val) { - $self->{reproxy_cache} ||= Perlbal::Cache->new(maxsize => 1); - $self->{reproxy_cache}->set_maxsize($val); - } else { - $self->{reproxy_cache} = undef; - } - return $mc->ok; - }, - }, - - 'upload_status_listeners' => { - des => "Comma separated list of hosts in form 'a.b.c.d:port' which will receive UDP upload status packets no faster than once a second per HTTP request (PUT/POST) from clients that have requested an upload status bar, which they request by appending the URL get argument ?client_up_session=[xxxxxx] where xxxxx is 5-50 'word' characters (a-z, A-Z, 0-9, underscore).", - default => "", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - my @packed; - foreach my $ipa (grep { $_ } split(/\s*,\s*/, $val)) { - unless ($ipa =~ /^(\d+\.\d+\.\d+\.\d+):(\d+)$/) { - $$errref = "Invalid UDP endpoint: \"$ipa\". Must be of form a.b.c.d:port"; - return 0; - } - push @packed, scalar Socket::sockaddr_in($2, Socket::inet_aton($1)); - } - $self->{upload_status_listeners_sockaddr} = \@packed; - return 1; - }, - }, - - 'min_put_directory' => { - des => "If PUT requests are enabled, require this many levels of directories to already exist. If not, fail.", - default => 0, # no limit - check_role => "web_server", - check_type => "int", - }, - - 'dirindexing' => { - des => "Show directory indexes when an HTTP request is for a directory. Warning: this is not an async operation, so will slow down Perlbal on heavily loaded sites.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'enable_concatenate_get' => { - des => "Enable Perlbal's multiple-files-in-one-request mode, where a client have use a comma-separated list of files to return, always in text/plain. Useful for web apps which have dozens/hundreds of tiny css/js files, and don't trust browsers/etc to do pipelining. Decreases overall round-trip latency a bunch, but requires app to be modified to support it. See t/17-concat.t test for details.", - default => 0, - check_role => "web_server", - check_type => "bool", - }, - - 'connect_ahead' => { - des => "How many extra backend connections we keep alive in addition to the current ones, in anticipation of new client connections.", - default => 0, - check_type => "int", - check_role => "reverse_proxy", - setter => sub { - my ($self, $val, $set, $mc) = @_; - my $rv = $set->(); - $self->spawn_backends if $self->{enabled}; - return $rv; - }, - }, - - 'always_trusted' => { - des => "Whether to trust all incoming requests' X-Forwarded-For and related headers. Set to true only if you know that all incoming requests from your own proxy servers that clean/set those headers.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'blind_proxy' => { - des => "Flag to disable any modification of X-Forwarded-For, X-Host, and X-Forwarded-Host headers.", - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'high_priority_cookie' => { - des => "The cookie name to inspect to determine if the client goes onto the high-priority queue.", - check_role => "reverse_proxy", - }, - - 'high_priority_cookie_contents' => { - des => "A string that the high_priority_cookie must contain to go onto the high-priority queue.", - check_role => "reverse_proxy", - }, - - 'trusted_upstream_proxies' => { - des => "A Net::Netmask filter (e.g. 10.0.0.0/24, see Net::Netmask) that determines whether upstream clients are trusted or not, where trusted means their X-Forwarded-For/etc headers are not munged.", - check_role => "*", - check_type => sub { - my ($self, $val, $errref) = @_; - unless (my $loaded = eval { require Net::Netmask; 1; }) { - $$errref = "Net::Netmask not installed"; - return 0; - } - - return 1 if $self->{trusted_upstream_proxies} = Net::Netmask->new2($val); - $$errref = "Error defining trusted upstream proxies: " . Net::Netmask::errstr(); - return 0; - }, - setter => sub { - my ($self, $val, $set, $mc) = @_; - # Do nothing here, we don't want the default setter because we've - # already set the value in the type_check step. - return $mc->ok; - }, - }, - - 'index_files' => { - check_role => "web_server", - default => "index.html", - des => "Comma-separated list of filenames to load when a user visits a directory URL, listed in order of preference.", - setter => sub { - my ($self, $val, $set, $mc) = @_; - $self->{index_files} = [ split(/[\s,]+/, $val) ]; - return $mc->ok; - }, - dumper => sub { - my ($self, $val) = @_; - return join(', ', @$val); - }, - }, - - 'default_service' => { - des => "Name of previously-created service to default requests that aren't matched by a selector plugin to.", - check_role => "selector", - check_type => sub { - my ($self, $val, $errref) = @_; - - my $svc = Perlbal->service($val); - unless ($svc) { - $$errref = "Service '$svc' not found"; - return 0; - } - - $self->{default_service} = $svc; - return 1; - }, - setter => sub { - # override default so we don't set it to the text - return $_[3]->ok; - }, - }, - - 'pool' => { - des => "Name of previously-created pool object containing the backend nodes that this reverse proxy sends requests to.", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - my $pl = Perlbal->pool($val); - unless ($pl) { - $$errref = "Pool '$val' not found"; - return 0; - } - $self->{pool}->decrement_use_count if $self->{pool}; - $self->{pool} = $pl; - $self->{pool}->increment_use_count; - $self->{generation}++; - return 1; - }, - setter => sub { - my ($self, $val, $set, $mc) = @_; - # override the default, which is to set "pool" to the - # stringified name of the pool, but we already set it in - # the type-checking phase. instead, we do nothing here. - return $mc->ok; - }, - dumper => sub { - my ($self, $val) = @_; - return $val->name; - } - }, - - 'server_process' => { - des => "Executable which will be the HTTP server on stdin/stdout. (ALPHA, EXPERIMENTAL!)", - check_role => "reverse_proxy", - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -x $val; - $$errref = "Server process ($val) not executable."; - return 0; - }, - }, - - 'persist_client_timeout' => { - des => "Timeout in seconds for HTTP keep-alives to the end user (default is 30)", - check_type => "int", - default => 30, - check_role => "*", - }, - - 'buffer_uploads_path' => { - des => "Directory root for storing files used to buffer uploads.", - - check_role => "reverse_proxy", - val_modify => sub { my $valref = shift; $$valref =~ s!/$!!; }, - check_type => sub { - my ($self, $val, $errref) = @_; - #FIXME: require absolute paths? - return 1 if $val && -d $val; - $$errref = "Directory ($val) not found for service $self->{name} (buffer_uploads_path)"; - return 0; - }, - }, - - 'buffer_uploads' => { - des => "Used to enable or disable the buffer uploads to disk system. If enabled, 'buffer_backend_connect' bytes worth of the upload will be stored in memory. At that point, the buffer upload thresholds will be checked to see if we should just send this upload to the backend, or if we should spool it to disk.", - default => 0, - check_role => "reverse_proxy", - check_type => "bool", - }, - - 'buffer_upload_threshold_time' => { - des => "If an upload is estimated to take more than this number of seconds, it will be buffered to disk. Set to 0 to not check estimated time.", - default => 5, - check_role => "reverse_proxy", - check_type => "int", - }, - - 'buffer_upload_threshold_size' => { - des => "If an upload is larger than this size in bytes, it will be buffered to disk. Set to 0 to not check size.", - default => '250k', - check_role => "reverse_proxy", - check_type => "size", - }, - - 'buffer_upload_threshold_rate' => { - des => "If an upload is coming in at a rate less than this value in bytes per second, it will be buffered to disk. Set to 0 to not check rate.", - default => 0, - check_role => "reverse_proxy", - check_type => "int", - }, - - 'latency' => { - des => "Forced latency (in milliseconds) to add to request.", - default => 0, - check_role => "selector", - check_type => "int", - }, - - 'enable_ssl' => { - des => "Enable SSL to the client.", - default => 0, - check_type => "bool", - check_role => "*", - }, - - 'ssl_key_file' => { - des => "Path to private key PEM file for SSL.", - default => "certs/server-key.pem", - check_type => "file_or_none", - check_role => "*", - }, - - 'ssl_cert_file' => { - des => "Path to certificate PEM file for SSL.", - default => "certs/server-cert.pem", - check_type => "file_or_none", - check_role => "*", - }, - - 'ssl_cipher_list' => { - des => "OpenSSL-style cipher list.", - default => "ALL:!LOW:!EXP", - check_role => "*", - }, - - 'enable_error_retries' => { - des => 'Whether Perlbal should transparently retry requests to backends if a backend returns a 500 server error.', - default => 0, - check_type => "bool", - check_role => "reverse_proxy", - }, - - 'error_retry_schedule' => { - des => 'String of comma-separated seconds (full or partial) to delay between retries. For example "0,2" would mean do at most two retries, the first zero seconds after the first failure, and the second 2 seconds after the 2nd failure. You probably don\'t need to modify the default value', - default => '0,.25,.50,1,1,1,1,1', - check_role => "reverse_proxy", - }, - - 'client_sndbuf_size' => { - des => "How large to set the client's socket SNDBUF.", - default => 0, - check_type => "size", - check_role => '*', - }, - - -}; -sub autodoc_get_tunables { return $tunables; } - -sub new { - my Perlbal::Service $self = shift; - $self = fields::new($self) unless ref $self; - - my ($name) = @_; - - $self->{name} = $name; - $self->{enabled} = 0; - $self->{extra_config} = {}; - - $self->{backend_no_spawn} = {}; - $self->{generation} = 0; - - $self->{hooks} = {}; - $self->{plugins} = {}; - $self->{plugin_order} = []; - - # track pending connects to backend - $self->{pending_connects} = {}; - $self->{pending_connect_count} = 0; - $self->{bored_backends} = []; - - # waiting clients - $self->{waiting_clients} = []; - $self->{waiting_clients_highpri} = []; - $self->{waiting_clients_lowpri} = []; - $self->{waiting_client_count} = 0; - $self->{waiting_client_map} = {}; - - # buffered upload setup - $self->{buffer_uploads_path} = undef; - - # don't have an object for this yet - $self->{trusted_upstream_proxies} = undef; - - # bare data structure for extra header info - $self->{extra_headers} = { remove => [], insert => [] }; - - # things to watch... - foreach my $v (qw(pending_connects bored_backends waiting_clients - waiting_clients_highpri backend_no_spawn - waiting_client_map - )) { - die "Field '$v' not set" unless $self->{$v}; - Perlbal::track_var("svc-$name-$v", $self->{$v}); - } - - return $self; -} - -# handy instance method to run some manage commands in the context of this service, -# without needing to worry about its name. -# This is intended as an internal API thing, so any output that would have been -# generated is just eaten. -sub run_manage_commands { - my ($self, $cmd_block) = @_; - - my $ctx = Perlbal::CommandContext->new; - $ctx->{last_created} = $self->name; - return Perlbal::run_manage_commands($cmd_block, undef, $ctx); -} - -# here's an alternative version of the above that runs a single command -sub run_manage_command { - my ($self, $cmd) = @_; - - my $ctx = Perlbal::CommandContext->new; - $ctx->{last_created} = $self->name; - return Perlbal::run_manage_command($cmd, undef, $ctx); -} - -sub dumpconfig { - my $self = shift; - - my @return; - - my %my_tunables = %$tunables; - - my $dump = sub { - my $setting = shift; - }; - - foreach my $skip (qw(role listen pool)) { - delete $my_tunables{$skip}; - } - - my $role = $self->{role}; - - foreach my $setting ("role", "listen", "pool", sort keys %my_tunables) { - my $attrs = $tunables->{$setting}; - my $value = $self->{$setting}; - - my $check_role = $attrs->{check_role}; - my $check_type = $attrs->{check_type}; - my $default = $attrs->{default}; - my $required = $attrs->{required}; - - next if ($check_role && $check_role ne '*' && $check_role ne $role); - - if ($check_type && $check_type eq 'size') { - $default = $1 if $default =~ /^(\d+)b$/i; - $default = $1 * 1024 if $default =~ /^(\d+)k$/i; - $default = $1 * 1024 * 1024 if $default =~ /^(\d+)m$/i; - } - - if (!$required) { - next unless defined $value; - next if (defined $default && $value eq $default); - } - - if (my $dumper = $attrs->{dumper}) { - $value = $dumper->($self, $value); - } - - if ($check_type && $check_type eq 'bool') { - $value = 'on' if $value; - } - - push @return, "SET $setting = $value"; - } - - my $plugins = $self->{plugins}; - - foreach my $plugin (keys %$plugins) { - local $@; - - my $class = "Perlbal::Plugin::$plugin"; - my $cv = $class->can('dumpconfig'); - - if ($cv) { - eval { push @return, $class->dumpconfig($self) }; - if ($@) { - push @return, "# Plugin '$plugin' threw an exception while being dumped."; - } - } else { - push @return, "# Plugin '$plugin' isn't capable of dumping config."; - } - } - - return @return; -} - -# called once a role has been set -sub init { - my Perlbal::Service $self = shift; - die "init called when no role" unless $self->{role}; - - # set all the defaults - for my $param (keys %$tunables) { - my $tun = $tunables->{$param}; - next unless $tun->{check_role} eq "*" || $tun->{check_role} eq $self->{role}; - next unless exists $tun->{default}; - $self->set($param, $tun->{default}); - } -} - -# Service -sub set { - my Perlbal::Service $self = shift; - my ($key, $val, $mc) = @_; - - # if you don't provide an $mc, that better mean you're damn sure it - # won't crash. (end-users never go this route) - $mc ||= Perlbal::ManageCommand->loud_crasher; - - my $set = sub { $self->{$key} = $val; return $mc->ok; }; - - my $pool_set = sub { - # if we don't have a pool, automatically create one named $NAME_pool - unless ($self->{pool}) { - # die if necessary - die "ERROR: Attempt to vivify pool $self->{name}_pool but one or more pools\n" . - " have already been created manually. Please set $key on a\n" . - " previously created pool.\n" unless $Perlbal::vivify_pools; - - # create the pool and ensure that vivify stays on - Perlbal::run_manage_command("CREATE POOL $self->{name}_pool", $mc->out); - Perlbal::run_manage_command("SET $self->{name}.pool = $self->{name}_pool"); - $Perlbal::vivify_pools = 1; - } - - # now we actually do the set - warn "WARNING: '$key' set on service $self->{name} on auto-vivified pool.\n" . - " This behavior is obsolete. This value should be set on a\n" . - " pool object and not on a service.\n" if $Perlbal::vivify_pools; - return $mc->err("No pool defined for service") unless $self->{pool}; - return $self->{pool}->set($key, $val, $mc); - }; - - # this is now handled by Perlbal::Pool, so we pass this set command on - # through in case people try to use it on us like the old method. - return $pool_set->() - if $key eq 'nodefile' || - $key eq 'balance_method'; - - my $bool = sub { - my $val = shift; - return 1 if $val =~ /^1|true|on|yes$/i; - return 0 if $val =~ /^0|false|off|no$/i; - return undef; - }; - - if (my $tun = $tunables->{$key}) { - if (my $req_role = $tun->{check_role}) { - return $mc->err("The '$key' option can only be set on a '$req_role' service") - unless ($self->{role}||"") eq $req_role || $req_role eq "*"; - } - - if (my $req_type = $tun->{check_type}) { - if (ref $req_type eq "ARRAY" && $req_type->[0] eq "enum") { - return $mc->err("Value of '$key' must be one of: " . join(", ", @{$req_type->[1]})) - unless grep { $val eq $_ } @{$req_type->[1]}; - } elsif (ref $req_type eq "ARRAY" && $req_type->[0] eq "regexp") { - my $re = $req_type->[1]; - my $emsg = $req_type->[2]; - return $mc->err($emsg) unless $val =~ /$re/; - } elsif (ref $req_type eq "CODE") { - my $emsg = ""; - return $mc->err($emsg) unless $req_type->($self, $val, \$emsg); - } elsif ($req_type eq "bool") { - $val = $bool->($val); - return $mc->err("Expecting boolean value for parameter '$key'") - unless defined $val; - } elsif ($req_type eq "int") { - return $mc->err("Expecting integer value for parameter '$key'") - unless $val =~ /^\d+$/; - } elsif ($req_type eq "size") { - $val = $1 if $val =~ /^(\d+)b$/i; - $val = $1 * 1024 if $val =~ /^(\d+)k$/i; - $val = $1 * 1024 * 1024 if $val =~ /^(\d+)m$/i; - return $mc->err("Expecting size unit value for parameter '$key' in bytes, or suffixed with 'K' or 'M'") - unless $val =~ /^\d+$/; - } elsif ($req_type eq "file") { - return $mc->err("File '$val' not found for '$key'") unless -f $val; - } elsif ($req_type eq "file_or_none") { - return $mc->err("File '$val' not found for '$key'") unless -f $val || $val eq $tun->{default}; - } else { - die "Unknown check_type: $req_type\n"; - } - } - - my $setter = $tun->{setter}; - - if (ref $setter eq "CODE") { - return $setter->($self, $val, $set, $mc); - } elsif ($tun->{_plugin_inserted}) { - # plugins that add tunables need to be stored in the extra_config hash due to the main object - # using fields. this passthrough is done so the config files don't need to specify this. - $self->{extra_config}->{$key} = $val; - return $mc->ok; - } else { - return $set->(); - } - } - - if ($key eq 'plugins') { - # unload existing plugins - foreach my $plugin (keys %{$self->{plugins}}) { - eval "Perlbal::Plugin::$plugin->unregister(\$self);"; - return $mc->err($@) if $@; - } - - # clear out loaded plugins and hooks - $self->{hooks} = {}; - $self->{plugins} = {}; - $self->{plugin_order} = []; - - # load some plugins - foreach my $plugin (split /[\s,]+/, $val) { - next if $plugin eq 'none'; - - my $fn = Perlbal::plugin_case($plugin); - - next if $self->{plugins}->{$fn}; - unless ($Perlbal::plugins{$fn}) { - $mc->err("Plugin $fn not loaded; not registered for $self->{name}."); - next; - } - - # now register it - eval "Perlbal::Plugin::$fn->register(\$self);"; - return $mc->err($@) if $@; - $self->{plugins}->{$fn} = 1; - push @{$self->{plugin_order}}, $fn; - } - return $mc->ok; - } - - if ($key =~ /^extra\.(.+)$/) { - # set some extra configuration data data - $self->{extra_config}->{$1} = $val; - return $mc->ok; - } - - # see if it happens to be a plugin set command? - if ($key =~ /^(.+)\.(.+)$/) { - if (my $coderef = $self->{plugin_setters}->{$1}->{$2}) { - return $coderef->($mc->out, $2, $val); - } - } - - return $mc->err("Unknown service parameter '$key'"); -} - -# CLASS METHOD - -# used by plugins that want to add tunables so that the config file -# can have more options for service settings -sub add_tunable { - my ($name, $hashref) = @_; - return 0 unless $name && $hashref && ref $hashref eq 'HASH'; - return 0 if $tunables->{$name}; - $hashref->{_plugin_inserted} = 1; # mark that a plugin did this - $tunables->{$name} = $hashref; - return 1; -} - -# CLASS METHOD - -# remove a defined tunable, but only if a plugin is what created it -sub remove_tunable { - my $name = shift; - my $tun = $tunables->{$name} or return 0; - return 0 unless $tun->{_plugin_inserted}; - delete $tunables->{$name}; - return 1; -} - -# CLASS METHOD - -# used by plugins to define a new role that services can take on -sub add_role { - my ($role, $creator) = @_; - return 0 unless $role && $creator && ref $creator eq 'CODE'; - return 0 if $PluginRoles{$role}; - $PluginRoles{$role} = $creator; - return 1; -} - -# CLASS METHOD - -# remove a defined plugin role -sub remove_role { - return 0 unless delete $PluginRoles{$_[0]}; - return 1; -} - -# CLASS METHOD - -# returns a defined role creator, if it exists. (undef if it does not) -sub get_role_creator { - return $PluginRoles{$_[0]}; -} - -# run the hooks in a list one by one until one hook returns a true -# value. returns 1 or 0 depending on if any hooks handled the -# request. -sub run_hook { - my Perlbal::Service $self = shift; - my $hook = shift; - if (defined (my $ref = $self->{hooks}->{$hook})) { - # call all the hooks until one returns true - foreach my $hookref (@$ref) { - my $rval = $hookref->[1]->(@_); - return 1 if $rval; - } - } - return 0; -} - -# run a bunch of hooks in this service, always returns undef. -sub run_hooks { - my Perlbal::Service $self = shift; - my $hook = shift; - if (defined (my $ref = $self->{hooks}->{$hook})) { - # call all the hooks - $_->[1]->(@_) foreach @$ref; - } - return undef; -} - -# define a hook for this service -sub register_hook { - my Perlbal::Service $self = shift; - my ($pclass, $hook, $ref) = @_; - push @{$self->{hooks}->{$hook} ||= []}, [ $pclass, $ref ]; - return 1; -} - -# remove hooks we have defined -sub unregister_hook { - my Perlbal::Service $self = shift; - my ($pclass, $hook) = @_; - if (defined (my $refs = $self->{hooks}->{$hook})) { - my @new; - foreach my $ref (@$refs) { - # fill @new with hooks that DON'T match - push @new, $ref - unless $ref->[0] eq $pclass; - } - $self->{hooks}->{$hook} = \@new; - return 1; - } - return undef; -} - -# remove all hooks of a certain class -sub unregister_hooks { - my Perlbal::Service $self = shift; - foreach my $hook (keys %{$self->{hooks}}) { - # call unregister_hook with this hook name - $self->unregister_hook($_[0], $hook); - } -} - -# register a value setter for plugin configuration -sub register_setter { - my Perlbal::Service $self = shift; - my ($pclass, $key, $coderef) = @_; - return unless $pclass && $key && $coderef; - $self->{plugin_setters}->{lc $pclass}->{lc $key} = $coderef; -} - -# remove a setter -sub unregister_setter { - my Perlbal::Service $self = shift; - my ($pclass, $key) = @_; - return unless $pclass && $key; - delete $self->{plugin_setters}->{lc $pclass}->{lc $key}; -} - -# remove a bunch of setters -sub unregister_setters { - my Perlbal::Service $self = shift; - my $pclass = shift; - return unless $pclass; - delete $self->{plugin_setters}->{lc $pclass}; -} - -# take a backend we've created and mark it as pending if we do not -# have another pending backend connection in this slot -sub add_pending_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - - # error if we already have a pending connection for this ipport - if (defined $self->{pending_connects}{$be->{ipport}}) { - Perlbal::log('warning', "Warning: attempting to spawn backend connection that already existed."); - - # now dump a backtrace so we know how we got here - my $depth = 0; - while (my ($package, $filename, $line, $subroutine) = caller($depth++)) { - Perlbal::log('warning', " -- [$filename:$line] $package::$subroutine"); - } - - # we're done now, just return - return; - } - - # set this connection up in the pending connection list - $self->{pending_connects}{$be->{ipport}} = $be; - $self->{pending_connect_count}++; -} - -# remove a backend connection from the pending connect list if and only -# if it is the actual connection contained in the list; prevent double -# decrementing on accident -sub clear_pending_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - if (defined $self->{pending_connects}{$be->{ipport}} && defined $be && - $self->{pending_connects}{$be->{ipport}} == $be) { - $self->{pending_connects}{$be->{ipport}} = undef; - $self->{pending_connect_count}--; - } -} - -# called by BackendHTTP when it's closed by any means -sub note_backend_close { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - $self->clear_pending_connect($be); - $self->spawn_backends; -} - -# called by ClientProxy when it dies. -sub note_client_close { - my Perlbal::Service $self; - my Perlbal::ClientProxy $cp; - ($self, $cp) = @_; - - if (delete $self->{waiting_client_map}{$cp->{fd}}) { - $self->{waiting_client_count}--; - } -} - -sub mark_node_used { - my Perlbal::Service $self = $_[0]; - $self->{pool}->mark_node_used($_[1]) if $self->{pool}; -} - -sub get_client { - my Perlbal::Service $self = shift; - - my $ret = sub { - my Perlbal::ClientProxy $cp = shift; - $self->{waiting_client_count}--; - delete $self->{waiting_client_map}{$cp->{fd}}; - - # before we return, start another round of connections - $self->spawn_backends; - - return $cp; - }; - - # determine if we should jump straight to the high priority queue or - # act as pressure relief on the standard queue - my $hp_first = 1; - if (($self->{queue_relief_size} > 0) && - (scalar(@{$self->{waiting_clients}}) >= $self->{queue_relief_size})) { - # if we're below the chance level, take a standard queue item - $hp_first = 0 - if rand(100) < $self->{queue_relief_chance}; - } - - # find a high-priority client, or a regular one - my Perlbal::ClientProxy $cp; - while ($hp_first && ($cp = shift @{$self->{waiting_clients_highpri}})) { - next if $cp->{closed}; - if (Perlbal::DEBUG >= 2) { - my $backlog = scalar @{$self->{waiting_clients}}; - print "Got from fast queue, in front of $backlog others\n"; - } - return $ret->($cp); - } - - # regular clients: - while ($cp = shift @{$self->{waiting_clients}}) { - next if $cp->{closed}; - print "Backend requesting client, got normal = $cp->{fd}.\n" if Perlbal::DEBUG >= 2; - return $ret->($cp); - } - - # low-priority (batch/idle) clients. - while ($cp = shift @{$self->{waiting_clients_lowpri}}) { - next if $cp->{closed}; - print "Backend requesting client, got low priority = $cp->{fd}.\n" if Perlbal::DEBUG >= 2; - return $ret->($cp); - } - - return undef; -} - -# given a backend, verify it's generation -sub verify_generation { - my Perlbal::Service $self = $_[0]; - my Perlbal::BackendHTTP $be = $_[1]; - - # fast cases: generation count matches, so we just return an 'okay!' flag - return 1 if $self->{generation} == $be->generation; - - # if our current pool knows about this ip:port, then we can still use it - if (defined $self->{pool}->node_used($be->ipport)) { - # so we know this is good, in the future we just want to hit the fast case - # and continue, so let's update the generation - $be->generation($self->{generation}); - return 1; - } - - # if we get here, the backend should be closed - $be->close('invalid_generation'); - return 0; -} - -# called by backend connection after it becomes writable -sub register_boredom { - my Perlbal::Service $self; - my Perlbal::BackendHTTP $be; - ($self, $be) = @_; - - # note that this backend is no longer pending a connect, - # if we thought it was before. but not if it's a persistent - # connection asking to be re-used. - unless ($be->{use_count}) { - $self->clear_pending_connect($be); - } - - # it is possible that this backend is part of a different pool that we're - # no longer using... if that's the case, we want to close it - return unless $self->verify_generation($be); - - # now try to fetch a client for it - my Perlbal::ClientProxy $cp = $self->get_client; - if ($cp) { - return if $be->assign_client($cp); - - # don't want to lose client, so we (unfortunately) - # stick it at the end of the waiting queue. - # fortunately, assign_client shouldn't ever fail. - $self->request_backend_connection($cp); - } - - # don't hang onto more bored, persistent connections than - # has been configured for connect-ahead - if ($be->{use_count}) { - my $current_bored = scalar @{$self->{bored_backends}}; - if ($current_bored >= $self->{backend_persist_cache}) { - $be->close('too_many_bored'); - return; - } - } - - # put backends which are known to be bound to processes - # and not to TCP stacks at the beginning where they'll - # be used first - if ($be->{has_attention}) { - unshift @{$self->{bored_backends}}, $be; - } else { - push @{$self->{bored_backends}}, $be; - } -} - -sub note_bad_backend_connect { - my Perlbal::Service $self = shift; - my Perlbal::BackendHTTP $be = shift; - my $retry_time = shift(); - - # clear this pending connection - $self->clear_pending_connect($be); - - # mark this host as dead for a while if we need to - if (defined $retry_time && $retry_time > 0) { - # we don't want other spawn_backends calls to retry - $self->{backend_no_spawn}->{$be->{ipport}} = 1; - - # and now we set a callback to ensure we're kicked at the right time - Perlbal::Socket::register_callback($retry_time, sub { - delete $self->{backend_no_spawn}->{$be->{ipport}}; - $self->spawn_backends; - }); - } - - # FIXME: do something interesting (tell load balancer about dead host, - # and fire up a new connection, if warranted) - - # makes a new connection, if needed - $self->spawn_backends; -} - -sub request_backend_connection { # : void - my Perlbal::Service $self; - my Perlbal::ClientProxy $cp; - ($self, $cp) = @_; - - return unless $cp && ! $cp->{closed}; - - my $hi_pri = $cp->{high_priority}; # load values from the client proxy object - my $low_pri = $cp->{low_priority}; # they are initialized as 0 during object creation, but hooks can override them - - # is there a defined high-priority cookie? - if (my $cname = $self->{high_priority_cookie}) { - # decide what priority class this request is in - my $hd = $cp->{req_headers}; - my %cookie; - foreach (split(/;\s+/, $hd->header("Cookie") || '')) { - next unless ($_ =~ /(.*)=(.*)/); - $cookie{Perlbal::Util::durl($1)} = Perlbal::Util::durl($2); - } - my $hicookie = $cookie{$cname} || ""; - $hi_pri = index($hicookie, $self->{high_priority_cookie_contents}) != -1; - } - - # now, call hook to see if this should be high priority - $hi_pri = $self->run_hook('make_high_priority', $cp) - unless $hi_pri; # only if it's not already - - # and then, call hook to see about low priority - $low_pri = $self->run_hook('make_low_priority', $cp) - unless $hi_pri || $low_pri; # only if it's not high or low already - - $cp->{high_priority} = 1 if $hi_pri; - $cp->{low_priority} = 1 if $low_pri; - - # before we even consider spawning backends, let's see if we have - # some bored (pre-connected) backends that'd take this client - my Perlbal::BackendHTTP $be; - my $now = time; - while ($be = shift @{$self->{bored_backends}}) { - next if $be->{closed}; - - # now make sure that it's still in our pool, and if not, close it - next unless $self->verify_generation($be); - - # don't use connect-ahead connections when we haven't - # verified we have their attention - if (! $be->{has_attention} && $be->{create_time} < $now - 5) { - $be->close("too_old_bored"); - next; - } - - # don't use keep-alive connections if we know the server's - # just about to kill the connection for being idle - if ($be->{disconnect_at} && $now + 2 > $be->{disconnect_at}) { - $be->close("too_close_disconnect"); - next; - } - - # give the backend this client - if ($be->assign_client($cp)) { - # and make some extra bored backends, if configured as such - $self->spawn_backends; - return; - } - - # assign client can end up closing the connection, so check for that - return if $cp->{closed}; - } - - if ($hi_pri) { - push @{$self->{waiting_clients_highpri}}, $cp; - } elsif ($low_pri) { - push @{$self->{waiting_clients_lowpri}}, $cp; - } else { - push @{$self->{waiting_clients}}, $cp; - } - - $self->{waiting_client_count}++; - $self->{waiting_client_map}{$cp->{fd}} = 1; - - $self->spawn_backends; -} - -# sees if it should spawn one or more backend connections -sub spawn_backends { - my Perlbal::Service $self = shift; - - # check our lock and set it if we can - return if $self->{spawn_lock}; - $self->{spawn_lock} = 1; - - # sanity checks on our bookkeeping - if ($self->{pending_connect_count} < 0) { - Perlbal::log('crit', "Bogus: service $self->{name} has pending connect ". - "count of $self->{pending_connect_count}?! Resetting."); - $self->{pending_connect_count} = scalar - map { $_ && ! $_->{closed} } values %{$self->{pending_connects}}; - } - - # keep track of the sum of existing_bored + bored_created - my $backends_created = scalar(@{$self->{bored_backends}}) + $self->{pending_connect_count}; - my $backends_needed = $self->{waiting_client_count} + $self->{connect_ahead}; - my $to_create = $backends_needed - $backends_created; - - my $pool = $self->{pool}; - - # can't create more than this, assuming one pending connect per node - my $max_creatable = $pool ? ($self->{pool}->node_count - $self->{pending_connect_count}) : 1; - $to_create = $max_creatable if $to_create > $max_creatable; - - # cap number of attempted connects at once - $to_create = 10 if $to_create > 10; - - my $now = time; - - while ($to_create > 0) { - $to_create--; - - # spawn processes if not a pool, else whine. - unless ($pool) { - if (my $sp = $self->{server_process}) { - warn "To create = $to_create...\n"; - warn " spawning $sp\n"; - my $be = Perlbal::BackendHTTP->new_process($self, $sp); - return; - } - warn "No pool! Can't spawn backends.\n"; - return; - } - - my ($ip, $port) = $self->{pool}->get_backend_endpoint; - unless ($ip) { - Perlbal::log('crit', "No backend IP for service $self->{name}"); - # FIXME: register desperate flag, so load-balancer module can callback when it has a node - $self->{spawn_lock} = 0; - return; - } - - # handle retry timeouts so we don't spin - next if $self->{backend_no_spawn}->{"$ip:$port"}; - - # if it's pending, verify the pending one is still valid - if (my Perlbal::BackendHTTP $be = $self->{pending_connects}{"$ip:$port"}) { - my $age = $now - $be->{create_time}; - if ($age >= 5 && $be->{state} eq "connecting") { - $be->close('connect_timeout'); - } elsif ($age >= 60 && $be->{state} eq "verifying_backend") { - # after 60 seconds of attempting to verify, we're probably already dead - $be->close('verify_timeout'); - } elsif (! $be->{closed}) { - next; - } - } - - # now actually spawn a backend and add it to our pending list - if (my $be = Perlbal::BackendHTTP->new($self, $ip, $port, { pool => $self->{pool} })) { - $self->add_pending_connect($be); - } - } - - # clear our spawn lock - $self->{spawn_lock} = 0; -} - -# getter only -sub role { - my Perlbal::Service $self = shift; - return $self->{role}; -} - -# called by BackendHTTP to ask if a client's IP is in our trusted list -sub trusted_ip { - my Perlbal::Service $self = shift; - my $ip = shift; - - return 1 if $self->{'always_trusted'}; - - my $tmap = $self->{trusted_upstream_proxies}; - return 0 unless $tmap; - - # try to use it as a Net::Netmask object - return 1 if eval { $tmap->match($ip); }; - return 0; -} - -# manage some header stuff -sub header_management { - my Perlbal::Service $self = shift; - my ($mode, $key, $val, $mc) = @_; - return $mc->err("no header provided") unless $key; - return $mc->err("no value provided") unless $val || $mode eq 'remove'; - return $mc->err("only valid on reverse_proxy services") unless $self->{role} eq 'reverse_proxy'; - - if ($mode eq 'insert') { - push @{$self->{extra_headers}->{insert}}, [ $key, $val ]; - } elsif ($mode eq 'remove') { - push @{$self->{extra_headers}->{remove}}, $key; - } - return $mc->ok; -} - -sub munge_headers { - my Perlbal::Service $self = $_[0]; - my Perlbal::HTTPHeaders $hdrs = $_[1]; - - # handle removals first - foreach my $hdr (@{$self->{extra_headers}->{remove}}) { - $hdrs->header($hdr, undef); - } - - # and now insertions - foreach my $hdr (@{$self->{extra_headers}->{insert}}) { - $hdrs->header($hdr->[0], $hdr->[1]); - } -} - -# getter/setter -sub selector { - my Perlbal::Service $self = shift; - if (@_) { - my $ref = shift; - $self->{selector} = sub { - my $cb = shift; - - # try to give it to our defined selector - my $res = $ref->($cb); - - # if that failed and we have a default, then give it to them - if (!$res && $self->{default_service}) { - $self->{default_service}->adopt_base_client($cb); - return 1; - } - - return $res; - }; - } - return $self->{selector}; -} - -# given a base client from a 'selector' role, down-cast it to its specific type -sub adopt_base_client { - my Perlbal::Service $self = shift; - my Perlbal::ClientHTTPBase $cb = shift; - - $cb->{service} = $self; - - if ($self->{'role'} eq "web_server") { - Perlbal::ClientHTTP->new_from_base($cb); - return; - } elsif ($self->{'role'} eq "reverse_proxy") { - Perlbal::ClientProxy->new_from_base($cb); - return; - } elsif ($self->{'role'} eq "selector") { - $self->selector()->($cb); - return; - } else { - $cb->_simple_response(500, "Can't map to service type $self->{'role'}"); - } -} - -# turn a ClientProxy or ClientHTTP back into a generic base client -# (for a service-selector role) -sub return_to_base { - my Perlbal::Service $self = shift; - my Perlbal::ClientHTTPBase $cb = shift; # actually a subclass of Perlbal::ClientHTTPBase - - $cb->{service} = $self; - Perlbal::Util::rebless($cb, "Perlbal::ClientHTTPBase"); - - # the read/watch events are reset by ClientHTTPBase's http_response_sent (our caller) -} - -# Service -sub enable { - my Perlbal::Service $self; - my $mc; - - ($self, $mc) = @_; - - if ($self->{enabled}) { - $mc && $mc->err("service $self->{name} is already enabled"); - return 0; - } - - my $listener; - - # create UDP upload tracker listener - if ($self->{role} eq "upload_tracker") { - $listener = Perlbal::UploadListener->new($self->{listen}, $self); - } - - # create TCP listening socket - if (! $listener && $self->{listen}) { - my $opts = {}; - if ($self->{enable_ssl}) { - $opts->{ssl} = { - SSL_key_file => $self->{ssl_key_file}, - SSL_cert_file => $self->{ssl_cert_file}, - SSL_cipher_list => $self->{ssl_cipher_list}, - }; - return $mc->err("IO::Socket:SSL (0.97+) not available. Can't do SSL.") unless eval "use IO::Socket::SSL 0.97 (); 1;"; - return $mc->err("SSL key file ($self->{ssl_key_file}) doesn't exist") unless -f $self->{ssl_key_file}; - return $mc->err("SSL cert file ($self->{ssl_cert_file}) doesn't exist") unless -f $self->{ssl_cert_file}; - } - - my $tl = Perlbal::TCPListener->new($self->{listen}, $self, $opts); - unless ($tl) { - $mc && $mc->err("Can't start service '$self->{name}' on $self->{listen}: $Perlbal::last_error"); - return 0; - } - $listener = $tl; - } - - $self->{listener} = $listener; - $self->{enabled} = 1; - return $mc ? $mc->ok : 1; -} - -# Service -sub disable { - my Perlbal::Service $self; - my ($mc, $force); - - ($self, $mc, $force) = @_; - - if (! $self->{enabled}) { - $mc && $mc->err("service $self->{name} is already disabled"); - return 0; - } - if ($self->{role} eq "management" && ! $force) { - $mc && $mc->err("can't disable management service"); - return 0; - } - - # find listening socket - my $tl = $self->{listener}; - $tl->close if $tl; - $self->{listener} = undef; - $self->{enabled} = 0; - return $mc ? $mc->ok : 1; -} - -sub stats_info -{ - my Perlbal::Service $self = shift; - my $out = shift; - my $now = time; - - $out->("SERVICE $self->{name}"); - $out->(" listening: " . ($self->{listen} || "--")); - $out->(" role: $self->{role}"); - if ($self->{role} eq "reverse_proxy" || - $self->{role} eq "web_server") { - $out->(" pend clients: $self->{waiting_client_count}"); - $out->(" pend backend: $self->{pending_connect_count}"); - foreach my $ipport (sort keys %{$self->{pending_connects}}) { - my $be = $self->{pending_connects}{$ipport}; - next unless $be; - my $age = $now - $be->{create_time}; - $out->(" $ipport - " . ($be->{closed} ? "(closed)" : $be->{state}) . " - ${age}s"); - } - } - if ($self->{role} eq "reverse_proxy") { - if ($self->{reproxy_cache}) { - my $hits = $self->{_stat_cache_hits} || 0; - my $hit_rate = sprintf("%0.02f%%", eval { $hits / ($self->{_stat_requests} || 0) * 100 } || 0); - - my $size = eval { $self->{reproxy_cache}->size }; - $size = defined($size) ? $size : 'undef'; - - my $maxsize = eval { $self->{reproxy_cache}->maxsize }; - $maxsize = defined ($maxsize) ? $maxsize : 'undef'; - - my $sizepercent = eval { sprintf("%0.02f%%", $size / $maxsize * 100) } || 'undef'; - - $out->(" cache size: $size/$maxsize ($sizepercent)"); - $out->(" cache hits: $hits"); - $out->("cache hit rate: $hit_rate"); - } - - my $bored_count = scalar @{$self->{bored_backends}}; - $out->(" connect-ahead: $bored_count/$self->{connect_ahead}"); - if ($self->{pool}) { - $out->(" pool: " . $self->{pool}->name); - $out->(" nodes:"); - foreach my $n (@{ $self->{pool}->nodes }) { - my $hostport = "$n->[0]:$n->[1]"; - $out->(sprintf(" %-21s %7d", $hostport, $self->{pool}->node_used($hostport) || 0)); - } - } - } elsif ($self->{role} eq "web_server") { - $out->(" docroot: $self->{docroot}"); - } -} - -# simple passthroughs to the run_hook mechanism. part of the reportto interface. -sub backend_response_received { - return $_[0]->run_hook('backend_response_received', $_[1]); -} - -# just a getter for our name -sub name { - my Perlbal::Service $self = $_[0]; - return $self->{name}; -} - -sub listenaddr { - my Perlbal::Service $self = $_[0]; - return $self->{listen}; -} - -sub reproxy_cache { - my Perlbal::Service $self = $_[0]; - return $self->{reproxy_cache}; -} - -sub add_to_reproxy_url_cache { - my Perlbal::Service $self; - my ($reqhd, $reshd); - - ($self, $reqhd, $reshd) = @_; - - # is caching enabled on this service? - my $cache = $self->{reproxy_cache} or - return 0; - - # these should always be set anyway, from BackendHTTP: - my $reproxy_cache_for = $reshd->header('X-REPROXY-CACHE-FOR') or return 0; - my $urls = $reshd->header('X-REPROXY-URL') or return 0; - - my ($timeout_delta, $cache_headers) = split ';', $reproxy_cache_for, 2; - my $timeout = $timeout_delta ? time() + $timeout_delta : undef; - - my $hostname = $reqhd->header("Host") || ''; - my $requri = $reqhd->request_uri || ''; - my $key = "$hostname|$requri"; - - my @headers; - foreach my $header (split /\s+/, $cache_headers) { - my $value; - next unless $header && ($value = $reshd->header($header)); - $value = _ref_to($value) if uc($header) eq 'CONTENT-TYPE'; - push @headers, _ref_to($header), $value; - } - - $cache->set($key, [$timeout, \@headers, $urls]); -} - -# given a string, return a shared reference to that string. to save -# memory when lots of same string is stored. -my %refs; -sub _ref_to { - my $key = shift; - return $refs{$key} || ($refs{$key} = \$key); -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Socket.pm b/lib/mogdeps/Perlbal/Socket.pm deleted file mode 100644 index 1a5d5a26..00000000 --- a/lib/mogdeps/Perlbal/Socket.pm +++ /dev/null @@ -1,390 +0,0 @@ -# Base class for all socket types -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - -package Perlbal::Socket; -use strict; -use warnings; -no warnings qw(deprecated); - -use Perlbal::HTTPHeaders; - -use Sys::Syscall; -use POSIX (); - -use Danga::Socket 1.44; -use base 'Danga::Socket'; - -use fields ( - 'headers_string', # headers as they're being read - - 'req_headers', # the final Perlbal::HTTPHeaders object inbound - 'res_headers', # response headers outbound (Perlbal::HTTPHeaders object) - - 'create_time', # creation time - 'alive_time', # last time noted alive - 'state', # general purpose state; used by descendants. - 'do_die', # if on, die and do no further requests - - 'read_buf', # arrayref of scalarref read from client - 'read_ahead', # bytes sitting in read_buf - 'read_size', # total bytes read from client, ever - - 'ditch_leading_rn', # if true, the next header parsing will ignore a leading \r\n - - 'observed_ip_string', # if defined, contains the observed IP string of the peer - # we're serving. this is intended for hoding the value of - # the X-Forwarded-For and using it to govern ACLs. - ); - -use constant MAX_HTTP_HEADER_LENGTH => 102400; # 100k, arbitrary - -use constant TRACK_OBJECTS => 0; # see @created_objects below -if (TRACK_OBJECTS) { - use Scalar::Util qw(weaken isweak); -} - -# kick-off one cleanup -_do_cleanup(); - -our %state_changes = (); # { "objref" => [ state, state, state, ... ] } -our $last_callbacks = 0; # time last ran callbacks -our $callbacks = []; # [ [ time, subref ], [ time, subref ], ... ] - -# this one deserves its own section. we keep track of every Perlbal::Socket object -# created if the TRACK_OBJECTS constant is on. we use weakened references, though, -# so this list will hopefully contain mostly undefs. users can ask for this list if -# they want to work with it via the get_created_objects_ref function. -our @created_objects; # ( $ref, $ref, $ref ... ) -our $last_co_cleanup = 0; # clean the list every few seconds - -sub get_statechange_ref { - return \%state_changes; -} - -sub get_created_objects_ref { - return \@created_objects; -} - -sub write_debuggy { - my $self = shift; - - my $cref = $_[0]; - my $content = ref $cref eq "SCALAR" ? $$cref : $cref; - my $clen = defined $content ? length($content) : "undef"; - $content = substr($content, 0, 17) . "..." if defined $content && $clen > 30; - my ($pkg, $filename, $line) = caller; - print "write($self, <$clen>\"$content\") from ($pkg, $filename, $line)\n" if Perlbal::DEBUG >= 4; - $self->SUPER::write(@_); -} - -if (Perlbal::DEBUG >= 4) { - *write = \&write_debuggy; -} - -sub new { - my Perlbal::Socket $self = shift; - $self = fields::new( $self ) unless ref $self; - - Perlbal::objctor($self); - - $self->SUPER::new( @_ ); - $self->{headers_string} = ''; - $self->{state} = undef; - $self->{do_die} = 0; - - $self->{read_buf} = []; # arrayref of scalar refs of bufs read from client - $self->{read_ahead} = 0; # bytes sitting in read_buf - $self->{read_size} = 0; # total bytes read from client - - my $now = time; - $self->{alive_time} = $self->{create_time} = $now; - - # now put this item in the list of created objects - if (TRACK_OBJECTS) { - # clean the created objects list if necessary - if ($last_co_cleanup < $now - 5) { - # remove out undefs, because those are natural byproducts of weakening - # references - @created_objects = grep { $_ } @created_objects; - - # however, the grep turned our weak references back into strong ones, so - # we have to re-weaken them - weaken($_) foreach @created_objects; - - # we've cleaned up at this point - $last_co_cleanup = $now; - } - - # now add this one to our cleaned list and weaken it - push @created_objects, $self; - weaken($created_objects[-1]); - } - - return $self; -} - -# FIXME: this doesn't scale in theory, but it might use less CPU in -# practice than using the Heap:: modules and manipulating the -# expirations all the time, thus doing things properly -# algorithmically. and this is definitely less work, so it's worth -# a try. -sub _do_cleanup { - my $sf = Perlbal::Socket->get_sock_ref; - - my $now = time; - - my @to_close; - while (my $k = each %$sf) { - my Perlbal::Socket $v = $sf->{$k}; - - my $max_age = eval { $v->max_idle_time } || 0; - next unless $max_age; - - if ($v->{alive_time} < $now - $max_age) { - push @to_close, $v; - } - } - - foreach my $sock (@to_close) { - $sock->close("perlbal_timeout") - } - - Danga::Socket->AddTimer(5, \&_do_cleanup); -} - -# CLASS METHOD: given a delay (in seconds) and a subref, this will call -# that subref in AT LEAST delay seconds. if the subref returns 0, the -# callback is discarded, but if it returns a positive number, the callback -# is pushed onto the callback stack to be called again in at least that -# many seconds. -sub register_callback { - # adds a new callback to our list - my ($delay, $subref) = @_; - push @$callbacks, [ time + $delay, $subref ]; - return 1; -} - -# CLASS METHOD: runs through the list of registered callbacks and executes -# any that need to be executed -# FIXME: this doesn't scale. need a heap. -sub run_callbacks { - my $now = time; - return if $last_callbacks == $now; - $last_callbacks = $now; - - my @destlist = (); - foreach my $ref (@$callbacks) { - # if their time is <= now... - if ($ref->[0] <= $now) { - # find out if they want to run again... - my $rv = $ref->[1]->(); - - # and if they do, push onto list... - push @destlist, [ $rv + $now, $ref->[1] ] - if defined $rv && $rv > 0; - } else { - # not time for this one, just shove it - push @destlist, $ref; - } - } - $callbacks = \@destlist; -} - -# CLASS METHOD: -# default is for sockets to never time out. classes -# can override. -sub max_idle_time { 0; } - -# Socket: specific to HTTP socket types (only here and not in -# ClientHTTPBase because ClientManage wants it too) -sub read_request_headers { read_headers($_[0], 0); } -sub read_response_headers { read_headers($_[0], 1); } -sub read_headers { - my Perlbal::Socket $self = shift; - my $is_res = shift; - print "Perlbal::Socket::read_headers($self) is_res=$is_res\n" if Perlbal::DEBUG >= 2; - - my $sock = $self->{sock}; - - my $to_read = MAX_HTTP_HEADER_LENGTH - length($self->{headers_string}); - - my $bref = $self->read($to_read); - unless (defined $bref) { - # client disconnected - print " client disconnected\n" if Perlbal::DEBUG >= 3; - return $self->close('remote_closure'); - } - - $self->{headers_string} .= $$bref; - my $idx = index($self->{headers_string}, "\r\n\r\n"); - my $delim_len = 4; - - # can't find the header delimiter? check for LFLF header delimiter. - if ($idx == -1) { - $idx = index($self->{headers_string}, "\n\n"); - $delim_len = 2; - } - # still can't find the header delimiter? - if ($idx == -1) { - - # usually we get the headers all in one packet (one event), so - # if we get in here, that means it's more than likely the - # extra \r\n and if we clean it now (throw it away), then we - # can avoid a regexp later on. - if ($self->{ditch_leading_rn} && $self->{headers_string} eq "\r\n") { - print " throwing away leading \\r\\n\n" if Perlbal::DEBUG >= 3; - $self->{ditch_leading_rn} = 0; - $self->{headers_string} = ""; - return 0; - } - - print " can't find end of headers\n" if Perlbal::DEBUG >= 3; - $self->close('long_headers') - if length($self->{headers_string}) >= MAX_HTTP_HEADER_LENGTH; - return 0; - } - - my $hstr = substr($self->{headers_string}, 0, $idx); - print " pre-parsed headers: [$hstr]\n" if Perlbal::DEBUG >= 3; - - my $extra = substr($self->{headers_string}, $idx+$delim_len); - if (my $len = length($extra)) { - print " pushing back $len bytes after header\n" if Perlbal::DEBUG >= 3; - $self->push_back_read(\$extra); - } - - # some browsers send an extra \r\n after their POST bodies that isn't - # in their content-length. a base class can tell us when they're - # on their 2nd+ request after a POST and tell us to be ready for that - # condition, and we'll clean it up - $hstr =~ s/^\r\n// if $self->{ditch_leading_rn}; - - unless (($is_res ? $self->{res_headers} : $self->{req_headers}) = - Perlbal::HTTPHeaders->new(\$hstr, $is_res)) { - # bogus headers? close connection. - print " bogus headers\n" if Perlbal::DEBUG >= 3; - return $self->close("parse_header_failure"); - } - - print " got valid headers\n" if Perlbal::DEBUG >= 3; - - $Perlbal::reqs++ unless $is_res; - $self->{ditch_leading_rn} = 0; - - return $is_res ? $self->{res_headers} : $self->{req_headers}; -} - -### METHOD: drain_read_buf_to( $destination ) -### Write read-buffered data (if any) from the receiving object to the -### I object. -sub drain_read_buf_to { - my ($self, $dest) = @_; - return unless $self->{read_ahead}; - - while (my $bref = shift @{$self->{read_buf}}) { - print "draining readbuf from $self to $dest: [$$bref]\n" if Perlbal::DEBUG >= 3; - $dest->write($bref); - $self->{read_ahead} -= length($$bref); - } -} - -### METHOD: die_gracefully() -### By default, if we're in persist_wait state, close. Else, ignore. Children -### can override if they want to do some other processing. -sub die_gracefully { - my Perlbal::Socket $self = $_[0]; - if ($self->state eq 'persist_wait') { - $self->close('graceful_shutdown'); - } - $self->{do_die} = 1; -} - -### METHOD: write() -### Overridden from Danga::Socket to update our alive time on successful writes -### Stops sockets from being closed on long-running write operations -sub write { - my $self = shift; - - my $ret; - if ($ret = $self->SUPER::write(@_)) { - # Mark this socket alive so we don't time out - $self->{alive_time} = $Perlbal::tick_time; - } - - return $ret; -} - -### METHOD: close() -### Set our state when we get closed. -sub close { - my Perlbal::Socket $self = $_[0]; - $self->state('closed'); - return $self->SUPER::close($_[1]); -} - -### METHOD: state() -### If you pass a parameter, sets the state, else returns it. -sub state { - my Perlbal::Socket $self = shift; - return $self->{state} unless @_; - - push @{$state_changes{"$self"} ||= []}, $_[0] if Perlbal::TRACK_STATES; - return $self->{state} = $_[0]; -} - -sub observed_ip_string { - my Perlbal::Socket $self = shift; - - if (@_) { - return $self->{observed_ip_string} = $_[0]; - } else { - return $self->{observed_ip_string}; - } -} - -sub as_string_html { - my Perlbal::Socket $self = shift; - return $self->SUPER::as_string; -} - -sub DESTROY { - my Perlbal::Socket $self = shift; - delete $state_changes{"$self"} if Perlbal::TRACK_STATES; - Perlbal::objdtor($self); -} - -# package function (not a method). returns bytes sent, or -1 on error. -our $sf_defined = Sys::Syscall::sendfile_defined; -our $max_sf_readwrite = 128 * 1024; -sub sendfile { - my ($sfd, $fd, $bytes) = @_; - return Sys::Syscall::sendfile($sfd, $fd, $bytes) if $sf_defined; - - # no support for sendfile. ghetto version: read and write. - my $buf; - $bytes = $max_sf_readwrite if $bytes > $max_sf_readwrite; - - my $rv = POSIX::read($fd, $buf, $bytes); - return -1 unless defined $rv; - return -1 unless $rv == $bytes; - - my $wv = POSIX::write($sfd, $buf, $rv); - return -1 unless defined $wv; - - if (my $over_read = $rv - $wv) { - POSIX::lseek($fd, -$over_read, &POSIX::SEEK_CUR); - } - - return $wv; -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/SocketSSL.pm b/lib/mogdeps/Perlbal/SocketSSL.pm deleted file mode 100644 index f868c251..00000000 --- a/lib/mogdeps/Perlbal/SocketSSL.pm +++ /dev/null @@ -1,135 +0,0 @@ -# Base class for SSL sockets. -# -# This is a simple class that extends Danga::Socket and contains an IO::Socket::SSL -# for the purpose of allowing non-blocking SSL in Perlbal. -# -# WARNING: this code will break IO::Socket::SSL if you use it in any plugins or -# have custom Perlbal modifications that use it. you will run into issues. This -# is because we override the close method to prevent premature closure of the socket, -# so you will end up with the socket not closing properly. -# -# Copyright 2007, Mark Smith . -# -# This file is licensed under the same terms as Perl itself. - -package Perlbal::SocketSSL; - -use strict; -use warnings; -no warnings qw(deprecated); - -use Danga::Socket 1.44; -use IO::Socket::SSL 0.98; -use Errno qw( EAGAIN ); - -use base 'Danga::Socket'; -use fields qw( listener create_time ); - -# magic IO::Socket::SSL crap to make it play nice with us -{ - no strict 'refs'; - no warnings 'redefine'; - - # replace IO::Socket::SSL::close with our own code... - my $orig = *IO::Socket::SSL::close{CODE}; - *IO::Socket::SSL::close = sub { - my $self = shift() - or return IO::Socket::SSL::_invalid_object(); - - # if we have args, close ourselves (second call!), else don't - if (exists ${*$self}->{__close_args}) { - $orig->($self, @{${*$self}->{__close_args}}); - } else { - ${*$self}->{__close_args} = [ @_ ]; - ${*$self}->{_danga_socket}->close('intercepted_ssl_close'); - } - }; -} - -# called: CLASS->new( $sock, $tcplistener ) -sub new { - my Perlbal::SocketSSL $self = shift; - $self = fields::new( $self ) unless ref $self; - - Perlbal::objctor($self); - - my ($sock, $listener) = @_; - - ${*$sock}->{_danga_socket} = $self; - $self->{listener} = $listener; - $self->{create_time} = time; - - $self->SUPER::new($sock); - - # TODO: would be good to have an overall timeout so that we can - # kill sockets that are open and just sitting there. "ssl_handshake_timeout" - # or something like that... - - return $self; -} - -# this is nonblocking, it attempts to setup SSL and if it can't then -# it returns whether it needs to read or write. we then setup to wait -# for the event it indicates and then wait. when that event fires, we -# call down again, and repeat the process until we have setup the -# SSL connection. -sub try_accept { - my Perlbal::SocketSSL $self = shift; - - my $sock = $self->{sock}->accept_SSL; - - if (defined $sock) { - # looks like we got it! let's steal it from ourselves - # so Danga::Socket gives up on it and we can send - # it out to someone else. (we discard the return value - # as we already have it in $sock) - # - # of course, life isn't as simple as that. we have to do - # some trickery with the ordering here to ensure that we - # don't setup the new class until after the Perlbal::SocketSSL - # goes away according to Danga::Socket. - # - # if we don't do it this way, we get nasty errors because - # we (this object) still exists in the DescriptorMap of - # Danga::Socket when the new Perlbal::ClientXX tries to - # insert itself there. - - # removes us from the active polling, closes up shop, but - # save our fd first! - my $fd = $self->{fd}; - $self->steal_socket; - - # finish blowing us away - my $ref = Danga::Socket->DescriptorMap(); - delete $ref->{$fd}; - - # now stick the new one in - $self->{listener}->class_new_socket($sock); - return; - } - - # nope, let's see if we can continue the process - if ($! == EAGAIN) { - if ($SSL_ERROR == SSL_WANT_READ) { - $self->watch_read(1); - } elsif ($SSL_ERROR == SSL_WANT_WRITE) { - $self->watch_write(1); - } else { - $self->close('invalid_ssl_state'); - } - } else { - $self->close('invalid_ssl_error'); - } -} - -sub event_read { - $_[0]->watch_read(0); - $_[0]->try_accept; -} - -sub event_write { - $_[0]->watch_write(0); - $_[0]->try_accept; -} - -1; diff --git a/lib/mogdeps/Perlbal/TCPListener.pm b/lib/mogdeps/Perlbal/TCPListener.pm deleted file mode 100644 index b16b81f9..00000000 --- a/lib/mogdeps/Perlbal/TCPListener.pm +++ /dev/null @@ -1,193 +0,0 @@ -###################################################################### -# TCP listener on a given port -# -# Copyright 2004, Danga Interactive, Inc. -# Copyright 2005-2007, Six Apart, Ltd. - - -package Perlbal::TCPListener; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields ('service', - 'hostport', - 'sslopts', - 'v6', # bool: IPv6 libraries are available - ); -use Socket qw(IPPROTO_TCP SOL_SOCKET SO_SNDBUF); - -BEGIN { - eval { require Perlbal::SocketSSL }; - if (Perlbal::DEBUG > 0 && $@) { warn "SSL support failed on load: $@\n" } -} - -# TCPListener -sub new { - my Perlbal::TCPListener $self = shift; - my ($hostport, $service, $opts) = @_; - - $self = fields::new($self) unless ref $self; - $opts ||= {}; - - # Were ipv4 or ipv6 explicitly mentioned by syntax? - my $force_v4 = 0; - my $force_v6 = 0; - - my @args; - if ($hostport =~ /^\d+$/) { - @args = ('LocalPort' => $hostport); - } elsif ($hostport =~ /^\d+\.\d+\.\d+\.\d+:/) { - $force_v4 = 1; - @args = ('LocalAddr' => $hostport); - } - - my $v6_errors = ""; - - my $can_v6 = 0; - if (!$force_v4) { - eval "use Danga::Socket 1.61; 1; "; - if ($@) { - $v6_errors = "Danga::Socket 1.61 required for IPv6 support."; - } elsif (!eval { require IO::Socket::INET6; 1 }) { - $v6_errors = "IO::Socket::INET6 required for IPv6 support."; - } else { - $can_v6 = 1; - } - } - - my $socket_class = $can_v6 ? "IO::Socket::INET6" : "IO::Socket::INET"; - $self->{v6} = $can_v6; - - my $sock = $socket_class->new( - @args, - Proto => IPPROTO_TCP, - Listen => 1024, - ReuseAddr => 1, - ); - - return Perlbal::error("Error creating listening socket: " . ($@ || $!)) - unless $sock; - - if ($^O eq 'MSWin32') { - # On Windows, we have to do this a bit differently. - # IO::Socket should really do this for us, but whatever. - my $do = 1; - ioctl($sock, 0x8004667E, \$do) or return Perlbal::error("Unable to make listener on $hostport non-blocking: $!"); - } - else { - # IO::Socket::INET's Blocking => 0 just doesn't seem to work - # on lots of perls. who knows why. - IO::Handle::blocking($sock, 0) or return Perlbal::error("Unable to make listener on $hostport non-blocking: $!"); - } - - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{hostport} = $hostport; - $self->{sslopts} = $opts->{ssl}; - $self->watch_read(1); - return $self; -} - -# TCPListener: accepts a new client connection -sub event_read { - my Perlbal::TCPListener $self = shift; - - # accept as many connections as we can - while (my ($psock, $peeraddr) = $self->{sock}->accept) { - IO::Handle::blocking($psock, 0); - - if (my $sndbuf = $self->{service}->{client_sndbuf_size}) { - my $rv = setsockopt($psock, SOL_SOCKET, SO_SNDBUF, pack("L", $sndbuf)); - } - - if (Perlbal::DEBUG >= 1) { - my ($pport, $pipr) = $self->{v6} ? - Socket6::unpack_sockaddr_in6($peeraddr) : - Socket::sockaddr_in($peeraddr); - my $pip = $self->{v6} ? - "[" . Socket6::inet_ntop(Socket6::AF_INET6(), $pipr) . "]" : - Socket::inet_ntoa($pipr); - print "Got new conn: $psock ($pip:$pport) for " . $self->{service}->role . "\n"; - } - - # SSL promotion if necessary - if ($self->{sslopts}) { - # try to upgrade to SSL, this does no IO it just re-blesses - # and prepares the SSL engine for handling us later - IO::Socket::SSL->start_SSL( - $psock, - SSL_server => 1, - SSL_startHandshake => 0, - %{ $self->{sslopts} }, - ); - print " .. socket upgraded to SSL!\n" if Perlbal::DEBUG >= 1; - - # safety checking to ensure we got upgraded - return $psock->close - unless ref $psock eq 'IO::Socket::SSL'; - - # class into new package and run with it - my $sslsock = new Perlbal::SocketSSL($psock, $self); - $sslsock->try_accept; - - # all done from our point of view - next; - } - - # puts this socket into the right class - $self->class_new_socket($psock); - } -} - -sub class_new_socket { - my Perlbal::TCPListener $self = shift; - my $psock = shift; - - my $service_role = $self->{service}->role; - if ($service_role eq "reverse_proxy") { - Perlbal::ClientProxy->new($self->{service}, $psock); - } elsif ($service_role eq "management") { - Perlbal::ClientManage->new($self->{service}, $psock); - } elsif ($service_role eq "web_server") { - Perlbal::ClientHTTP->new($self->{service}, $psock); - } elsif ($service_role eq "selector") { - # will be cast to a more specific class later... - Perlbal::ClientHTTPBase->new($self->{service}, $psock, $self->{service}); - } elsif (my $creator = Perlbal::Service::get_role_creator($service_role)) { - # was defined by a plugin, so we want to return one of these - $creator->($self->{service}, $psock); - } -} - -sub as_string { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service '$svc->{name}'"; - return $ret; -} - -sub as_string_html { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string_html; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service $svc->{name}"; - return $ret; -} - -sub die_gracefully { - # die off so we stop waiting for new connections - my $self = shift; - $self->close('graceful_death'); -} - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Test.pm b/lib/mogdeps/Perlbal/Test.pm deleted file mode 100644 index e5bda231..00000000 --- a/lib/mogdeps/Perlbal/Test.pm +++ /dev/null @@ -1,401 +0,0 @@ -#!/usr/bin/perl -w - -package Perlbal::Test; - -=head1 NAME - -Perlbal::Test - Test harness for perlbal server - -=head1 SYNOPSIS - -# my $msock = Perlbal::Test::start_server(); - -=head1 DESCRIPTION - -Perlbal::Test provides access to a perlbal server running on the -local host, for testing purposes. - -The server can be an already-existing server, a child process, or -the current process. - -Various functions are provided to interact with the server. - -=head1 FUNCTIONS - -=cut - -use strict; -use POSIX qw( :sys_wait_h ); -use IO::Socket::INET; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); -use HTTP::Response; - -require Exporter; -use vars qw(@ISA @EXPORT); -@ISA = qw(Exporter); -@EXPORT = qw(ua start_server foreach_aio manage filecontent tempdir new_port - manage_multi - mgmt_port wait_on_child dump_res resp_from_sock msock); - -our $i_am_parent = 0; -our $msock; # management sock of child -our $to_kill = 0; -our $mgmt_port; - -our $free_port = 60000; - -=head1 I - -Return the current management port number. - -=cut - -sub mgmt_port { - return $mgmt_port; -} - -END { - manage("shutdown") if $i_am_parent; -} - -=head1 I - -Return a readable string formatted from an HTTP::Response object. -Only the first 80 characters of returned content are returned. - -=cut - -sub dump_res { - my $res = shift; - my ($pkg, $filename, $line) = caller; - my $ret = "$filename:$line ==> "; - unless ($res) { - $ret .= "[response undefined]\n"; - return $ret; - } - my $ct = $res->content; - my $len = length $ct; - if ($len > 80) { - $ct = substr($ct, 0, 80) . "..."; - } - my $status = $res->status_line; - $status =~ s/[\r\n]//g; - return $ret . "status=[$status] content=$len" . "[$ct]\n"; -} - -=head1 I - -Return a newly created temporary directory. The directory will be -removed automatically upon program exit. - -=cut - -sub tempdir { - require File::Temp; - return File::Temp::tempdir( CLEANUP => 1 ); -} - -=head1 I - -Return the next free port number in the series. Port numbers are assigned -starting at 60000. - -=cut - -sub new_port { - test_port() ? return $free_port++ : return new_port($free_port++); -} - -=head1 I - -Return 1 if the port is free to use for listening on $free_port else return 0. - -=cut - -sub test_port { - my $sock = IO::Socket::INET->new(LocalPort => $free_port) or return 0; - $sock->close(); - return 1; -} - -=head1 I> - -Return a string containing the contents of the file $file. If $file -cannot be opened, then return undef. - -=cut - -sub filecontent { - my $file = shift; - my $ct; - open (F, $file) or return undef; - $ct = do { local $/; ; }; - close F; - return $ct; -} - -=head1 I - -Set the server into each AIO mode (none, ioaio) and call the specified -callback function with the mode name as argument. - -=cut - -sub foreach_aio (&) { - my $cb = shift; - - foreach my $mode (qw(none ioaio)) { - my $line = manage("SERVER aio_mode = $mode"); - next unless $line; - $cb->($mode); - } -} - -=head1 I - -Send a command $cmd to the server, and return the response line from -the server. - -Optional arguments are: - - quiet_failure => 1 - -Output a warning if the response indicated an error, -unless $opts{quiet_failure} is true, or the command -was 'shutdown' (which doesn't return a response). - -=cut - -sub manage { - my $cmd = shift; - my %opts = @_; - - print $msock "$cmd\r\n"; - my $res = <$msock>; - - if (!$res || $res =~ /^ERR/) { - # Make the result visible in failure cases, unless - # the command was 'shutdown'... cause that never - # returns anything. - warn "Manage command failed: '$cmd' '$res'\n" - unless($opts{quiet_failure} || $cmd eq 'shutdown'); - - return 0; - } - return $res; -} - -=head1 I - -Send a command $cmd to the server, and return a multi-line -response. Return the number zero if there was an error or -no response. - -=cut - -sub manage_multi { - my $cmd = shift; - - print $msock "$cmd\r\n"; - my $res; - while (<$msock>) { - last if /^\./; - last if /^ERROR/; - $res .= $_; - } - return 0 if !$res || $res =~ /^ERR/; - return $res; -} - -=head1 I - -Optionally start a perlbal server and return a socket connected to its -management port. - -The argument $conf is a string specifying initial configuration -commands. - -If the environment variable TEST_PERLBAL_FOREGROUND is set to a true -value then a server will be started in the foreground, in which case -this function does not return. When the server function finishes, -exit() will be called to terminate the process. - -If the environment variable TEST_PERLBAL_USE_EXISTING is set to a true -value then a socket will be returned which is connected to an existing -server's management port. - -Otherwise, a child process is forked and a socket is returned which is -connected to the child's management port. - -The management port is assigned automatically, a new port number each -time this function is called. The starting port number is 60000. - -=cut - -sub start_server { - my $conf = shift; - $mgmt_port = new_port(); - - if ($ENV{'TEST_PERLBAL_FOREGROUND'}) { - _start_perbal_server($conf, $mgmt_port); - } - - if ($ENV{'TEST_PERLBAL_USE_EXISTING'}) { - my $msock = wait_on_child(0, $mgmt_port); - return $msock; - } - - my $child = fork; - if ($child) { - $i_am_parent = 1; - $to_kill = $child; - my $msock = wait_on_child($child, $mgmt_port); - my $rv = waitpid($child, WNOHANG); - if ($rv) { - die "Child process (webserver) died.\n"; - } - print $msock "proc\r\n"; - my $spid = undef; - while (<$msock>) { - last if m!^\.\r?\n!; - next unless /^pid:\s+(\d+)/; - $spid = $1; - } - die "Our child was $child, but we connected and it says it's $spid." - unless $child == $spid; - - return $msock; - } - - # child process... - _start_perbal_server($conf, $mgmt_port); -} - -# Start a perlbal server running and tell it to listen on the specified -# management port number. This function does not return. - -sub _start_perbal_server { - my ($conf, $mgmt_port) = @_; - - require Perlbal; - - $conf .= qq{ -CREATE SERVICE mgmt -SET mgmt.listen = 127.0.0.1:$mgmt_port -SET mgmt.role = management -ENABLE mgmt -}; - - my $out = sub { print STDOUT "$_[0]\n"; }; - die "Configuration error" unless Perlbal::run_manage_commands($conf, $out); - - unless (Perlbal::Socket->WatchedSockets() > 0) { - die "Invalid configuration. (shouldn't happen?) Stopping (self=$$).\n"; - } - - Perlbal::run(); - exit 0; -} - - -=head1 I - -Return a reference to the socket connected to the server's management -port. - -=cut - -sub msock { - return $msock; -} - - -=head1 I - -Return a new instance of LWP::UserAgent. - -=cut - -sub ua { - require LWP; - require LWP::UserAgent; - return LWP::UserAgent->new; -} - -=head1 I - -Return a socket which is connected to a child process. - -$pid specifies the child process id, and $port is the port number on -which the child is listening. - -Several attempts are made; if the child dies or a connection cannot -be made within 5 seconds then this function dies with an error message. - -=cut - -sub wait_on_child { - my $pid = shift; - my $port = shift; - - my $start = time; - while (1) { - $msock = IO::Socket::INET->new(PeerAddr => "127.0.0.1:$port"); - return $msock if $msock; - select undef, undef, undef, 0.25; - if ($pid && waitpid($pid, WNOHANG) > 0) { - die "Child process (webserver) died.\n"; - } - die "Timeout waiting for port $port to startup" if time > $start + 5; - } -} - -=head1 I - -Read an HTTP response from a socket and return it -as an HTTP::Response object - -In scalar mode, return only the $http_response object. - -In array mode, return an array of ($http_response, $firstline) where -$firstline is the first line read from the socket, for example: - -"HTTP/1.1 200 OK" - -=cut - -sub resp_from_sock { - my $sock = shift; - - my $res = ""; - my $firstline = undef; - - while (<$sock>) { - $res .= $_; - $firstline ||= $_; - last if ! $_ || /^\r?\n/; - } - - unless ($firstline) { - print STDERR "Didn't get a firstline in HTTP response.\n"; - return undef; - } - - my $resp = HTTP::Response->parse($res); - return undef unless $resp; - - my $cl = $resp->header('Content-Length'); - if (defined $cl && $cl > 0) { - my $content = ''; - my $rv; - while (($rv = read($sock, $content, $cl)) && - ($cl -= $rv) > 0) { - # don't do anything, the loop is it - } - $resp->content($content); - } - - return wantarray ? ($resp, $firstline) : $resp; -} - -1; diff --git a/lib/mogdeps/Perlbal/Test/WebClient.pm b/lib/mogdeps/Perlbal/Test/WebClient.pm deleted file mode 100644 index 71edbbd4..00000000 --- a/lib/mogdeps/Perlbal/Test/WebClient.pm +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl - -package Perlbal::Test::WebClient; - -use strict; -use IO::Socket::INET; -use Perlbal::Test; -use HTTP::Response; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); - -require Exporter; -use vars qw(@ISA @EXPORT $FLAG_NOSIGNAL); -@ISA = qw(Exporter); -@EXPORT = qw(new); - -$FLAG_NOSIGNAL = 0; -eval { $FLAG_NOSIGNAL = MSG_NOSIGNAL; }; - -# create a blank object -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - return $self; -} - -# get/set what server we should be testing; "ip:port" generally -sub server { - my $self = shift; - if (@_) { - $self->{_sock} = undef; - return $self->{server} = shift; - } else { - return $self->{server}; - } -} - -# get/set what hostname we send with requests -sub host { - my $self = shift; - if (@_) { - $self->{_sock} = undef; - return $self->{host} = shift; - } else { - return $self->{host}; - } -} - -# set which HTTP version to emulate; specify '1.0' or '1.1' -sub http_version { - my $self = shift; - if (@_) { - return $self->{http_version} = shift; - } else { - return $self->{http_version}; - } -} - -# set on or off to enable or disable persistent connection -sub keepalive { - my $self = shift; - if (@_) { - $self->{keepalive} = shift() ? 1 : 0; - } - return $self->{keepalive}; -} - -# construct and send a request -sub request { - my $self = shift; - return undef unless $self->{server}; - - my $opts = ref $_[0] eq "HASH" ? shift : {}; - my $opt_headers = delete $opts->{'headers'}; - my $opt_host = delete $opts->{'host'}; - my $opt_method = delete $opts->{'method'}; - my $opt_content = delete $opts->{'content'}; - my $opt_extra_rn = delete $opts->{'extra_rn'}; - my $opt_return_reader = delete $opts->{'return_reader'}; - my $opt_post_header_pause = delete $opts->{'post_header_pause'}; - die "Bogus options: " . join(", ", keys %$opts) if %$opts; - - my $cmds = join(',', map { eurl($_) } @_); - return undef unless $cmds; - - # keep-alive header if 1.0, also means add content-length header - my $headers = ''; - if ($self->{keepalive}) { - $headers .= "Connection: keep-alive\r\n"; - } else { - $headers .= "Connection: close\r\n"; - } - - if ($opt_headers) { - $headers .= $opt_headers; - } - - if (my $hostname = $opt_host || $self->{host}) { - $headers .= "Host: $hostname\r\n"; - } - my $method = $opt_method || "GET"; - my $body = ""; - - if ($opt_content) { - $headers .= "Content-Length: " . length($opt_content) . "\r\n"; - $body = $opt_content; - } - - if ($opt_extra_rn) { - $body .= "\r\n"; # some browsers on POST send an extra \r\n that's not part of content-length - } - - my $send = "$method /$cmds HTTP/$self->{http_version}\r\n$headers\r\n"; - - unless ($opt_post_header_pause) { - $send .= $body; - } - - my $len = length $send; - - # send setup - my $rv; - my $sock = delete $self->{_sock}; - local $SIG{'PIPE'} = "IGNORE" unless $FLAG_NOSIGNAL; - - ### send it cached - if ($sock) { - $rv = send($sock, $send, $FLAG_NOSIGNAL); - if ($! || ! defined $rv) { - undef $self->{_sock}; - } elsif ($rv != $len) { - return undef; - } - } - - # failing that, send it through a new socket - unless ($rv) { - $self->{_reqdone} = 0; - - $sock = IO::Socket::INET->new( - PeerAddr => $self->{server}, - Timeout => 3, - ) or return undef; - setsockopt($sock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die "failed to set sockopt: $!\n"; - - $rv = send($sock, $send, $FLAG_NOSIGNAL); - if ($! || $rv != $len) { - return undef; - } - } - - if ($opt_post_header_pause) { - select undef, undef, undef, $opt_post_header_pause; - my $len = length $body; - if ($len) { - my $rv = send($sock, $body, $FLAG_NOSIGNAL); - if ($! || ! defined $rv) { - undef $self->{_sock}; - } elsif ($rv != $len) { - return undef; - } - } - } - - my $parse_it = sub { - my ($resp, $firstline) = resp_from_sock($sock); - - my $conhdr = $resp->header("Connection") || ""; - if (($firstline =~ m!\bHTTP/1\.1\b! && $conhdr !~ m!\bclose\b!i) || - ($firstline =~ m!\bHTTP/1\.0\b! && $conhdr =~ m!\bkeep-alive\b!i)) { - $self->{_sock} = $sock; - $self->{_reqdone}++; - } else { - $self->{_reqdone} = 0; - } - - return $resp; - }; - - if ($opt_return_reader) { - return $parse_it; - } else { - return $parse_it->(); - } -} - -sub reqdone { - my $self = shift; - return $self->{_reqdone}; -} - -# general purpose URL escaping function -sub eurl { - my $a = $_[0]; - $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg; - $a =~ tr/ /+/; - return $a; -} - -1; diff --git a/lib/mogdeps/Perlbal/Test/WebServer.pm b/lib/mogdeps/Perlbal/Test/WebServer.pm deleted file mode 100644 index 5d154482..00000000 --- a/lib/mogdeps/Perlbal/Test/WebServer.pm +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/perl - -package Perlbal::Test::WebServer; - -use strict; -use IO::Socket::INET; -use HTTP::Request; -use Socket qw(MSG_NOSIGNAL IPPROTO_TCP TCP_NODELAY SOL_SOCKET); -use Perlbal::Test; - -use Perlbal::Test::WebClient; - -require Exporter; -use vars qw(@ISA @EXPORT); -@ISA = qw(Exporter); -@EXPORT = qw(start_webserver); - -our @webserver_pids; - -my $testpid; # of the test suite's main program, the one running the HTTP client - -END { - # ensure we kill off the webserver - kill 9, @webserver_pids if $testpid && $testpid == $$; -} - - -sub start_webserver { - my $port = new_port(); - - # dummy mode - if ($ENV{'TEST_PERLBAL_USE_EXISTING'}) { - return $port; - } - - $testpid = $$; - - if (my $child = fork) { - # i am parent, wait for child to startup - push @webserver_pids, $child; - my $sock = wait_on_child($child, $port); - die "Unable to spawn webserver on port $port\n" - unless $sock; - print $sock "GET /reqdecr,status HTTP/1.0\r\n\r\n"; - my $line = <$sock>; - die "Didn't get 200 OK: " . (defined $line ? $line : "(undef)") - unless $line && $line =~ /200 OK/; - return $port; - } - - # i am child, start up - my $ssock = IO::Socket::INET->new(LocalPort => $port, ReuseAddr => 1, Listen => 3) - or die "Unable to start socket: $!\n"; - while (my $csock = $ssock->accept) { - exit 0 unless $csock; - fork and next; # parent starts waiting for next request - setsockopt($csock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die; - serve_client($csock); - } -} - -sub serve_client { - my $csock = shift; - my $req_num = 0; - my $did_options = 0; - my @reqs; - - REQ: - while (1) { - my $req = ''; - my $clen = undef; - while (<$csock>) { - $req .= $_; - if (/^content-length:\s*(\d+)/i) { $clen = $1; }; - last if ! $_ || /^\r?\n/; - } - exit 0 unless $req; - - # parse out things we want to have - my @cmds; - my $httpver = 0; # 0 = 1.0, 1 = 1.1, undef = neither - my $method; - if ($req =~ m!^([A-Z]+) /?(\S+) HTTP/(1\.\d+)\r?\n?!) { - $method = $1; - my $cmds = durl($2); - @cmds = split(/\s*,\s*/, $cmds); - $req_num++; - $httpver = ($3 eq '1.0' ? 0 : ($3 eq '1.1' ? 1 : undef)); - } - my $msg = HTTP::Request->parse($req); - my $keeping_alive = undef; - - my $body; - if ($clen) { - die "Can't read a body on a GET or HEAD" if $method =~ /^GET|HEAD$/; - my $read = read $csock, $body, $clen; - die "Didn't read $clen bytes. Got $read." if $clen != $read; - } - - my $response = sub { - my %opts = @_; - my $code = delete $opts{code}; - my $codetext = delete $opts{codetext}; - my $content = delete $opts{content}; - my $ctype = delete $opts{type}; - my $extra_hdr = delete $opts{headers}; - die "unknown data in opts: %opts" if %opts; - - $extra_hdr ||= ''; - $code ||= $content ? 200 : 200; - $codetext ||= { 200 => 'OK', 500 => 'Internal Server Error', 204 => "No Content" }->{$code}; - $content ||= ""; - - my $clen = length $content; - $ctype ||= "text/plain" unless $code == 204; - $extra_hdr .= "Content-Type: $ctype\r\n" if $ctype; - - my $hdr_keepalive = ""; - - unless (defined $keeping_alive) { - my $hdr_connection = $msg->header('Connection') || ''; - if ($httpver == 1) { - if ($hdr_connection =~ /\bclose\b/i) { - $keeping_alive = 0; - } else { - $keeping_alive = "1.1implicit"; - } - } - if ($httpver == 0 && $hdr_connection =~ /\bkeep-alive\b/i) { - $keeping_alive = "1.0keepalive"; - } - } - - if ($keeping_alive) { - $hdr_keepalive = "Connection: keep-alive\n"; - } else { - $hdr_keepalive = "Connection: close\n"; - } - - return "HTTP/1.0 $code $codetext\r\n" . - $hdr_keepalive . - "Content-Length: $clen\r\n" . - $extra_hdr . - "\r\n" . - "$content"; - }; - - my $send = sub { - my $res = shift; - print $csock $res; - exit 0 unless $keeping_alive; - }; - - # 500 if no commands were given or we don't know their HTTP version - # or we didn't parse a proper HTTP request - unless (@cmds && defined $httpver && $msg) { - print STDERR "500 response!\n"; - $send->($response->(code => 500)); - next REQ; - } - - if ($method eq "OPTIONS") { - $did_options = 1; - $send->($response->(code => 200)); - next REQ; - } - - # prepare a simple 200 to send; undef this if you want to control - # your own output below - my $to_send; - - foreach my $cmd (@cmds) { - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - - if ($cmd =~ /^sleep:([\d\.]+)$/i) { - my $sleeptime = $1; - #print "I, $$, should sleep for $sleeptime.\n"; - use Time::HiRes; - my $t1 = Time::HiRes::time(); - select undef, undef, undef, $1; - my $t2 = Time::HiRes::time(); - my $td = $t2 - $t1; - #print "I, $$, slept for $td\n"; - } - - if ($cmd =~ /^keepalive:([01])$/i) { - $keeping_alive = $1; - } - - if ($cmd eq "status") { - my $len = $clen || 0; - my $bu = $msg->header('X-PERLBAL-BUFFERED-UPLOAD-REASON') || ''; - $to_send = $response->(content => - "pid = $$\nreqnum = $req_num\nmethod = $method\n". - "length = $len\nbuffered = $bu\noptions = $did_options\n"); - } - - if ($cmd eq "reqdecr") { - $req_num--; - } - - if ($cmd =~ /^kill:(\d+):(\w+)$/) { - kill $2, $1; - } - - if ($cmd =~ /^reproxy_url:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-URL: $1\r\n", - code => 204, - ); - } - - if ($cmd =~ /^reproxy_url204:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-URL: $1\r\n"); - } - - if ($cmd =~ /^reproxy_url_cached:(\d+):(.+)/i) { - kill 'USR1', $testpid; - $to_send = $response->(headers => - "X-Reproxy-URL: $2\r\nX-Reproxy-Cache-For: $1; Last-Modified Content-Type\r\n"); - } - - if ($cmd =~ /^reproxy_url_multi:((?:\d+:){2,})(\S+)/i) { - my $ports = $1; - my $path = $2; - my @urls; - foreach my $port (split(/:/, $ports)) { - push @urls, "http://127.0.0.1:$port$path"; - } - $to_send = $response->(headers => "X-Reproxy-URL: @urls\r\n"); - } - - if ($cmd =~ /^reproxy_file:(.+)/i) { - $to_send = $response->(headers => "X-Reproxy-File: $1\r\n"); - } - - if ($cmd =~ /^subreq:(\d+)$/) { - my $port = $1; - my $wc = Perlbal::Test::WebClient->new; - $wc->server("127.0.0.1:$port"); - $wc->keepalive(0); - $wc->http_version('1.0'); - my $resp = $wc->request("status"); - my $subpid; - if ($resp && $resp->content =~ /^pid = (\d+)$/m) { - $subpid = $1; - } - $to_send = $response->(content => "pid = $$\nsubpid = $subpid\nreqnum = $req_num\n"); - } - } - - $send->($to_send || $response->()); - } # while(1) -} - -# de-url escape -sub durl { - my ($a) = @_; - $a =~ tr/+/ /; - $a =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; - return $a; -} - -1; diff --git a/lib/mogdeps/Perlbal/UploadListener.pm b/lib/mogdeps/Perlbal/UploadListener.pm deleted file mode 100644 index 062f7ef6..00000000 --- a/lib/mogdeps/Perlbal/UploadListener.pm +++ /dev/null @@ -1,107 +0,0 @@ -###################################################################### -# Listen for UDP upload status packets -# -# Copyright 2005-2007, Six Apart, Ltd. - - -package Perlbal::UploadListener; -use strict; -use warnings; -no warnings qw(deprecated); - -use base "Perlbal::Socket"; -use fields qw(service hostport); - -# TCPListener -sub new { - my ($class, $hostport, $service) = @_; - - my $sock = - IO::Socket::INET->new( - LocalAddr => $hostport, - Proto => "udp", - ReuseAddr => 1, - Blocking => 0, - ); - - return Perlbal::error("Error creating listening socket: " . ($@ || $!)) - unless $sock; - my $self = fields::new($class); - $self->SUPER::new($sock); - $self->{service} = $service; - $self->{hostport} = $hostport; - $self->watch_read(1); - return $self; -} - -my %status; -my @todelete; - -sub get_status { - my $ses = shift; - return $status{$ses}; -} - -# TCPListener: accepts a new client connection -sub event_read { - my Perlbal::TCPListener $self = shift; - - my $buf; - $self->{sock}->recv($buf, 500); - return unless $buf =~ /^UPLOAD:(\w{5,50}):(\d+):(\d+):(\d+):(\d+)$/; - my ($ses, $done, $total, $starttime, $nowtime) = ($1, $2, $3, $4, $5); - - my $now = time(); - - $status{$ses} = { - done => $done, - total => $total, - starttime => $starttime, - lasttouch => $now, - }; - - # keep a history of touched records, then we'll clean 'em - # after 30 seconds. - push @todelete, [$now, $ses]; - my $too_old = $now - 4; - while (@todelete && $todelete[0][0] < $too_old) { - my $rec = shift @todelete; - my $to_kill = $rec->[1]; - if (my $krec = $status{$to_kill}) { - my $last_touch = $krec->{lasttouch}; - delete $status{$to_kill} if $last_touch < $too_old; - } - } -} - -sub as_string { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service '$svc->{name}'"; - return $ret; -} - -sub as_string_html { - my Perlbal::TCPListener $self = shift; - my $ret = $self->SUPER::as_string_html; - my Perlbal::Service $svc = $self->{service}; - $ret .= ": listening on $self->{hostport} for service $svc->{name}"; - return $ret; -} - -sub die_gracefully { - # die off so we stop waiting for new connections - my $self = shift; - $self->close('graceful_death'); -} - - -1; - - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Perlbal/Util.pm b/lib/mogdeps/Perlbal/Util.pm deleted file mode 100644 index 17efc599..00000000 --- a/lib/mogdeps/Perlbal/Util.pm +++ /dev/null @@ -1,53 +0,0 @@ -# misc util functions -# - -package Perlbal::Util; -use strict; -use warnings; -no warnings qw(deprecated); - -sub durl { - my ($txt) = @_; - $txt =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; - return $txt; -} - -=head2 C< rebless > - -Safely re-bless a locked (use fields) hash into another package. Note -that for our convenience elsewhere the set of allowable keys for the -re-blessed hash will be the union of the keys allowed by its old package -and those allowed for the package into which it is blessed. - -=cut - -BEGIN { - if ($] >= 5.010) { - eval q{ - use Hash::Util qw(legal_ref_keys unlock_ref_keys lock_ref_keys) - }; - *rebless = sub { - my ($obj, $pkg) = @_; - my @keys = legal_ref_keys($obj); - unlock_ref_keys($obj); - bless $obj, $pkg; - lock_ref_keys($obj, @keys, - legal_ref_keys(fields::new($pkg))); - return $obj; - }; - } - else { - *rebless = sub { - my ($obj, $pkg) = @_; - return bless $obj, $pkg; - }; - } -} - -1; - -# Local Variables: -# mode: perl -# c-basic-indent: 4 -# indent-tabs-mode: nil -# End: diff --git a/lib/mogdeps/Sys/Syscall.pm b/lib/mogdeps/Sys/Syscall.pm deleted file mode 100644 index 25272587..00000000 --- a/lib/mogdeps/Sys/Syscall.pm +++ /dev/null @@ -1,335 +0,0 @@ -# LICENSE: You're free to distribute this under the same terms as Perl itself. - -package Sys::Syscall; -use strict; -use POSIX qw(ENOSYS SEEK_CUR); -use Config; - -require Exporter; -use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION); - -$VERSION = "0.22"; -@ISA = qw(Exporter); -@EXPORT_OK = qw(sendfile epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD); -%EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD)], - sendfile => [qw(sendfile)], - ); - -use constant EPOLLIN => 1; -use constant EPOLLOUT => 4; -use constant EPOLLERR => 8; -use constant EPOLLHUP => 16; -use constant EPOLLRDBAND => 128; -use constant EPOLL_CTL_ADD => 1; -use constant EPOLL_CTL_DEL => 2; -use constant EPOLL_CTL_MOD => 3; - -our $loaded_syscall = 0; - -sub _load_syscall { - # props to Gaal for this! - return if $loaded_syscall++; - my $clean = sub { - delete @INC{qw}; - }; - $clean->(); # don't trust modules before us - my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 }; - $clean->(); # don't require modules after us trust us - return $rv; -} - -our ($sysname, $nodename, $release, $version, $machine) = POSIX::uname(); - -our ( - $SYS_epoll_create, - $SYS_epoll_ctl, - $SYS_epoll_wait, - $SYS_sendfile, - $SYS_readahead, - ); - -if ($^O eq "linux") { - # whether the machine requires 64-bit numbers to be on 8-byte - # boundaries. - my $u64_mod_8 = 0; - - # if we're running on an x86_64 kernel, but a 32-bit process, - # we need to use the i386 syscall numbers. - if ($machine eq "x86_64" && $Config{ptrsize} == 4) { - $machine = "i386"; - } - - if ($machine =~ m/^i[3456]86$/) { - $SYS_epoll_create = 254; - $SYS_epoll_ctl = 255; - $SYS_epoll_wait = 256; - $SYS_sendfile = 187; # or 64: 239 - $SYS_readahead = 225; - } elsif ($machine eq "x86_64") { - $SYS_epoll_create = 213; - $SYS_epoll_ctl = 233; - $SYS_epoll_wait = 232; - $SYS_sendfile = 40; - $SYS_readahead = 187; - } elsif ($machine eq "ppc64") { - $SYS_epoll_create = 236; - $SYS_epoll_ctl = 237; - $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # (sys32_sendfile). sys32_sendfile64=226 (64 bit processes: sys_sendfile64=186) - $SYS_readahead = 191; # both 32-bit and 64-bit vesions - $u64_mod_8 = 1; - } elsif ($machine eq "ppc") { - $SYS_epoll_create = 236; - $SYS_epoll_ctl = 237; - $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # sys_sendfile64=226 - $SYS_readahead = 191; - $u64_mod_8 = 1; - } elsif ($machine eq "ia64") { - $SYS_epoll_create = 1243; - $SYS_epoll_ctl = 1244; - $SYS_epoll_wait = 1245; - $SYS_sendfile = 1187; - $SYS_readahead = 1216; - $u64_mod_8 = 1; - } elsif ($machine eq "alpha") { - # natural alignment, ints are 32-bits - $SYS_sendfile = 370; # (sys_sendfile64) - $SYS_epoll_create = 407; - $SYS_epoll_ctl = 408; - $SYS_epoll_wait = 409; - $SYS_readahead = 379; - $u64_mod_8 = 1; - } else { - # as a last resort, try using the *.ph files which may not - # exist or may be wrong - _load_syscall(); - $SYS_epoll_create = eval { &SYS_epoll_create; } || 0; - $SYS_epoll_ctl = eval { &SYS_epoll_ctl; } || 0; - $SYS_epoll_wait = eval { &SYS_epoll_wait; } || 0; - $SYS_readahead = eval { &SYS_readahead; } || 0; - } - - if ($u64_mod_8) { - *epoll_wait = \&epoll_wait_mod8; - *epoll_ctl = \&epoll_ctl_mod8; - } else { - *epoll_wait = \&epoll_wait_mod4; - *epoll_ctl = \&epoll_ctl_mod4; - } -} - -elsif ($^O eq "freebsd") { - if ($ENV{FREEBSD_SENDFILE}) { - # this is still buggy and in development - $SYS_sendfile = 393; # old is 336 - } -} - -############################################################################ -# sendfile functions -############################################################################ - -unless ($SYS_sendfile) { - _load_syscall(); - $SYS_sendfile = eval { &SYS_sendfile; } || 0; -} - -sub sendfile_defined { return $SYS_sendfile ? 1 : 0; } - -if ($^O eq "linux" && $SYS_sendfile) { - *sendfile = \&sendfile_linux; -} elsif ($^O eq "freebsd" && $SYS_sendfile) { - *sendfile = \&sendfile_freebsd; -} else { - *sendfile = \&sendfile_noimpl; -} - -sub sendfile_noimpl { - $! = ENOSYS; - return -1; -} - -# C: ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count) -# Perl: sendfile($write_fd, $read_fd, $max_count) --> $actually_sent -sub sendfile_linux { - return syscall( - $SYS_sendfile, - $_[0] + 0, # fd - $_[1] + 0, # fd - 0, # don't keep track of offset. callers can lseek and keep track. - $_[2] + 0 # count - ); -} - -sub sendfile_freebsd { - my $offset = POSIX::lseek($_[1]+0, 0, SEEK_CUR) + 0; - my $ct = $_[2] + 0; - my $sbytes_buf = "\0" x 8; - my $rv = syscall( - $SYS_sendfile, - $_[1] + 0, # fd (from) - $_[0] + 0, # socket (to) - $offset, - $ct, - 0, # struct sf_hdtr *hdtr - $sbytes_buf, # off_t *sbytes - 0); # flags - return $rv if $rv < 0; - - - my $set = unpack("L", $sbytes_buf); - POSIX::lseek($_[1]+0, SEEK_CUR, $set); - return $set; -} - - -############################################################################ -# epoll functions -############################################################################ - -sub epoll_defined { return $SYS_epoll_create ? 1 : 0; } - -# ARGS: (size) -- but in modern Linux 2.6, the -# size doesn't even matter (radix tree now, not hash) -sub epoll_create { - return -1 unless defined $SYS_epoll_create; - my $epfd = eval { syscall($SYS_epoll_create, ($_[0]||100)+0) }; - return -1 if $@; - return $epfd; -} - -# epoll_ctl wrapper -# ARGS: (epfd, op, fd, events_mask) -sub epoll_ctl_mod4 { - syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0)); -} -sub epoll_ctl_mod8 { - syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0)); -} - -# epoll_wait wrapper -# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref) -# arrayref: values modified to be [$fd, $event] -our $epoll_wait_events; -our $epoll_wait_size = 0; -sub epoll_wait_mod4 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 12 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8)); - } - return $ct; -} - -sub epoll_wait_mod8 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 16 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - # 16 byte epoll_event structs, with format: - # 4 byte mask [idx 1] - # 4 byte padding (we put it into idx 2, useless) - # 8 byte data (first 4 bytes are fd, into idx 0) - @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12)); - } - return $ct; -} - - -1; -__END__ - -=head1 NAME - -Sys::Syscall - access system calls that Perl doesn't normally provide access to - -=head1 SYNOPSIS - - use Sys::Syscall; - -=head1 DESCRIPTION - -Use epoll, sendfile, from Perl. Mostly Linux-only support now, but -more syscalls/OSes planned for future. - -=head1 Exports - -Nothing by default. - -May export: sendfile epoll_ctl epoll_create epoll_wait EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD - -Export tags: :epoll and :sendfile - -=head1 Functions - -=head2 epoll support - -=over 4 - -=item $ok = epoll_defined() - -Returns true if epoll might be available. (caller must still test with epoll_create) - -=item $epfd = epoll_create([ $start_size ]) - -Create a new epoll filedescriptor. Returns -1 if epoll isn't available. - -=item $rv = epoll_ctl($epfd, $op, $fd, $events) - -See manpage for epoll_ctl - -=item $count = epoll_wait($epfd, $max_events, $timeout, $arrayref) - -See manpage for epoll_wait. $arrayref is an arrayref to be modified -with the items returned. The values put into $arrayref are arrayrefs -of [$fd, $state]. - -=back - -=head2 sendfile support - -=over 4 - -=item $ok = sendfile_defined() - -Returns true if sendfile should work on this operating system. - -=item $sent = sendfile($sock_fd, $file_fd, $max_send) - -Sends up to $max_send bytes from $file_fd to $sock_fd. Returns bytes -actually sent, or -1 on error. - -=back - -=head1 COPYRIGHT - -This module is Copyright (c) 2005, 2006 Six Apart, Ltd. - -All rights reserved. - -You may distribute under the terms of either the GNU General Public -License or the Artistic License, as specified in the Perl README file. -If you need more liberal licensing terms, please contact the -maintainer. - -=head1 WARRANTY - -This is free software. IT COMES WITHOUT WARRANTY OF ANY KIND. - -=head1 AUTHORS - -Brad Fitzpatrick - diff --git a/mogilefsd b/mogilefsd index 5301a29e..8d00d344 100755 --- a/mogilefsd +++ b/mogilefsd @@ -73,9 +73,9 @@ See L. See L. -=item B -- background filesystem consistency checker +=item B -- background filesystem consistency checker -See L. +See L. =back diff --git a/mogstored b/mogstored index 82cbf127..38d911c1 100755 --- a/mogstored +++ b/mogstored @@ -14,25 +14,6 @@ use strict; use lib 'lib'; use Mogstored::HTTPServer; -# based on where we found Mogstored::HTTPServer (a pure-perl module), -# add the mogdeps/ subdirectory of that base to our @INC search -# path, where all the misc Mogile dependencies are installed. -BEGIN { - my $libpath; - if (! $ENV{MOGILE_NO_BUILTIN_DEPS} && - ($libpath = $INC{"Mogstored/HTTPServer.pm"}) && - $libpath =~ s!Mogstored/HTTPServer.pm$!!) - { - my $dep_dir = "${libpath}mogdeps"; - push @INC, $dep_dir; - unless (($ENV{PERL5LIB} || "") =~ /$dep_dir/) { - $ENV{PERL5LIB} = join(":", - split(/:/, $ENV{PERL5LIB} || ""), - $dep_dir); - } - } -} - use IO::Socket::INET; use POSIX qw(WNOHANG); use Perlbal 1.73; @@ -42,6 +23,7 @@ use Mogstored::HTTPServer::Perlbal; use Mogstored::HTTPServer::Lighttpd; use Mogstored::HTTPServer::None; use Mogstored::HTTPServer::Apache; +use Mogstored::HTTPServer::Nginx; use Mogstored::SideChannelListener; use Mogstored::SideChannelClient; @@ -56,6 +38,7 @@ my $iostat_available = 1; # bool: iostat working. assume working to start. my ($iostat_pipe_r, $iostat_pipe_w); # pipes for talking to iostat process # Config: +my $opt_skipconfig; my $opt_daemonize; my $opt_config; my $opt_iostat = 1; # default to on now @@ -73,6 +56,7 @@ $0 = "mogstored"; my %config_opts = ( 'iostat' => \$opt_iostat, + 's|skipconfig' => \$opt_skipconfig, 'daemonize|d' => \$opt_daemonize, 'config=s' => \$opt_config, 'httplisten=s' => \$http_listen, @@ -85,11 +69,11 @@ my %config_opts = ( ); usage() unless Getopt::Long::GetOptions(%config_opts); -die "Unknown server type. Valid options: --server={perlbal,lighttpd,apache,none}" - unless $server =~ /^perlbal|lighttpd|apache|none$/; +die "Unknown server type. Valid options: --server={perlbal,lighttpd,apache,nginx,none}" + unless $server =~ /^perlbal|lighttpd|apache|nginx|none$/; $opt_config = $default_config if ! $opt_config && -e $default_config; -load_config_file($opt_config => \%config_opts) if $opt_config; +load_config_file($opt_config => \%config_opts) if $opt_config && !$opt_skipconfig; # initialize basic required Perlbal machinery, for any HTTP server my $perlbal_init = qq{ @@ -113,7 +97,13 @@ my $httpsrv = $httpsrv_class->new( maxconns => $max_conns, bin => $serverbin, ); -$httpsrv->start; + +# Configure Perlbal HTTP listener after daemonization since it can create a +# kqueue on *BSD. kqueue descriptors are automatically invalidated on fork(), +# making them unusable after daemonize. For non-Perlbal, starting the +# server before daemonization improves error reporting as daemonization +# redirects stdout/stderr to /dev/null. +$httpsrv->start if $server ne "perlbal"; if ($opt_daemonize) { $httpsrv->pre_daemonize; @@ -122,6 +112,9 @@ if ($opt_daemonize) { print "Running.\n"; } +# It is now safe for Perlbal to create a kqueue +$httpsrv->start if $server eq "perlbal"; + $httpsrv->post_daemonize; # kill our children processes on exit: diff --git a/t/00-startup.t b/t/00-startup.t index 2942157d..666cdad6 100644 --- a/t/00-startup.t +++ b/t/00-startup.t @@ -9,6 +9,10 @@ use MogileFS::Server; use MogileFS::Util qw(error_code); use MogileFS::Test; +BEGIN { + $ENV{TESTING} = 1; +} + find_mogclient_or_skip(); # use mogadm to init it, @@ -19,9 +23,7 @@ find_mogclient_or_skip(); # etc my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 70; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -80,11 +82,61 @@ ok($be->do_request("test", {}), "test ping worked"); ok(!$be->do_request("test", {crash => 1}), "crash didn't"); ok($be->do_request("test", {}), "test ping again worked"); +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + ok(want($c, 1, "queryworker"), "set 1 queryworker"); + + my $expect = "ERR no_domain No+domain+provided\r\n" x 2; + + # bad domain won't return twice + my $cmd = "list_keys domain=\r\n"; + $c->syswrite($cmd x 2); + my $r; + my $resp = ""; + do { + $r = $c->sysread(my $buf, 500); + if (defined $r && $r > 0) { + $resp .= $buf; + } + } while ($r && length($resp) != length($expect)); + is($resp, $expect, "response matches expected"); + + ok(want($c, 2, "queryworker"), "restored 2 queryworkers"); +} ok($tmptrack->mogadm("domain", "add", "todie"), "created todie domain"); ok($tmptrack->mogadm("domain", "delete", "todie"), "delete todie domain"); ok(!$tmptrack->mogadm("domain", "delete", "todie"), "didn't delete todie domain again"); +# ensure "default" class is removed when its domain is removed +{ + use Data::Dumper; + my $before = Dumper($sto->get_all_classes); + ok($tmptrack->mogadm("domain", "add", "def"), "created def domain"); + + my $dmid = $sto->get_domainid_by_name("def"); + ok(defined($dmid), "def dmid retrieved"); + + isnt($sto->domain_has_classes($dmid), "domain_has_classes does not show default class"); + ok($tmptrack->mogadm("class", "modify", "def", "default", "--mindevcount=3"), "modified default to have mindevcount=3"); + + my $classid = $sto->get_classid_by_name($dmid, "default"); + is($classid, 0, "default class has classid=0"); + isnt($sto->domain_has_classes($dmid), "domain_has_classes does not show default class"); + ok($tmptrack->mogadm("domain", "delete", "def"), "remove def domain"); + is($sto->get_domainid_by_name("def"), undef, "def nonexistent"); + is($sto->get_classid_by_name($dmid, "default"), undef, "def/default class nonexistent"); + + my $after = Dumper($sto->get_all_classes); + is($after, $before, "class listing is unchanged"); +} + +ok($tmptrack->mogadm("domain", "add", "hasclass"), "created hasclass domain"); +ok($tmptrack->mogadm("class", "add", "hasclass", "nodel"), "created nodel class"); +ok(!$tmptrack->mogadm("domain", "delete", "hasclass"), "didn't delete hasclass domain"); +ok($tmptrack->mogadm("class", "delete", "hasclass", "nodel"), "created nodel class"); +ok($tmptrack->mogadm("domain", "delete", "hasclass"), "didn't delete hasclass domain"); + ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); @@ -108,18 +160,22 @@ ok($tmptrack->mogadm("device", "add", "hostB", 4), "created dev4 on hostB"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } { my $fh = $mogc->new_file('no_content', "2copies"); + die "Error: " . $mogc->errstr unless $fh; ok(close($fh), "closed file"); } { my $fh = $mogc->new_file('no_content', "2copies"); + die "Error: " . $mogc->errstr unless $fh; ok(close($fh), "closed file"); } @@ -202,19 +258,21 @@ for (1..10) { is scalar($mogc->get_paths("file1copy")), 1, 'File is on 1 device'; - $mogc->update_class('2copies'); + ok($mogc->update_class('file1copy', '2copies'), "updated class to 2 copies"); # wait for it to replicate ok(try_for(10, sub { my @urls = $mogc->get_paths("file1copy"); my $nloc = @urls; - if ($nloc < 1) { + if ($nloc < 2) { diag("no_content still only on $nloc devices"); return 0; } return 1; }), "replicated to 2 paths"); + ok($mogc->update_class('file1copy', 'default'), "updated class to default"); + ok($mogc->delete("file1copy"), "deleted updateclass testfile file1copy") or die "Error: " . $mogc->errstr; } @@ -272,9 +330,6 @@ ok($tmptrack->mogadm("host", "add", "hostC", "--ip=$hostC_ip", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC"); ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); -# let it be discovered -sleep(3); # FIXME: make an explicit "rescan" or "remonitor" job to mogilefsd, just for test suite - ok($tmptrack->mogadm("device", "mark", "hostB", 3, "dead"), "marked device B/3 dead"); ok($tmptrack->mogadm("device", "mark", "hostB", 4, "dead"), "marked device B/4 dead"); @@ -318,11 +373,117 @@ foreach my $t (qw(file file_on file_to_delete)) { }), "table $t is empty"); } -sub try_for { - my ($tries, $code) = @_; - for (1..$tries) { - return 1 if $code->(); - sleep 1; +# Test some broken client modes. +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', + Timeout => 3); + die "Failed to connect to test tracker" unless $c; + # Pretend to upload a file, then tell the server weird things. + # Not trying to be defensable to all sorts of things, but ensuring we're + # safe against double close, bad destdev, etc. + print $c "create_open " + . "domain=testdom&fid=0&class=&multi_dest=1&key=fufufu\n"; + my $res = <$c>; + my $fidid; + ok($res =~ m/fid=(\d+)/, "bare create_open worked"); + $fidid = $1; + # Pretend we uploaded something. + print $c "create_close " + . "domain=testdom&fid=$fidid&devid=4&size=0&key=fufufu" + . "&path=http://127.0.1.2:7500/dev4/0/000/000/0000000$fidid.fid\n"; + my $res2 = <$c>; + ok($res2 =~ m/invalid_destdev/, "cannot upload to unlisted destdev"); + + # TODO: test double closing, etc. +} + +# give an explicit fid, to prevent bugs like the reason behind +# commit ac5534a0c3d046e660fa7581c9173857f182bd81 +# This is functionality is a bad idea otherwise. +{ + my $expfid = 2147483632; + my $opts = { fid => $expfid }; + my $fh = $mogc->new_file("explicit_fid", "1copy", 2, $opts); + die "Error: " . $mogc->errstr unless $fh; + ok((print $fh "hi" ), "wrote 2 bytes"); + ok(close($fh), "closed file"); + my $info = $mogc->file_info("explicit_fid"); + + is($info->{devcount}, 1, "devcount is 1"); + is($info->{fid}, $opts->{fid}, "explicit fid is correctly set"); +} + +{ + my $fh = $mogc->new_file("0", "1copy"); + ok((print $fh "zero\n"), "wrote to file"); + ok(close($fh), "closed file"); + + my $info = $mogc->file_info("0"); + is("HASH", ref($info), "file_info returned a hash"); + is("0", $info->{key}, "key matches 0"); + is("1copy", $info->{class}, "class matches for 0 key"); + + my @paths = $mogc->get_paths("0"); + is(1, scalar(@paths), "path returned for 0"); + + $mogc->rename("0", "zero"); + is($info->{fid}, $mogc->file_info("zero")->{fid}, "rename from 0"); + $mogc->rename("zero", "0"); + is($info->{fid}, $mogc->file_info("0")->{fid}, "rename to 0"); + $mogc->update_class("0", "2copies"); + is("2copies", $mogc->file_info("0")->{class}, "class updated for 0 key"); + + my $debug = $mogc->file_debug(key => "0"); + is($debug->{fid_fid}, $info->{fid}, "FID from debug matches"); + is($debug->{fid_dkey}, "0", "key from debug matches"); + + ok($mogc->delete("0"), "delete 0 works"); +} + +# ensure all workers can be stopped/started +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + my @jobs = qw(fsck queryworker delete replicate reaper monitor job_master); + + foreach my $j (@jobs) { + ok(want($c, 0, $j), "shut down all $j"); + } + + # spawn job_master first to ensure delete/fsck/replicate can start + foreach my $j (reverse @jobs) { + ok(want($c, 1, $j), "start 1 $j"); } - return 0; } + +# list_keys with underscore +{ + my $c = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001', Timeout => 3); + + foreach my $k (qw(under_score under.score per%cent back\\slash)) { + my $fh = $mogc->new_file($k, '1copy'); + ok($fh, "got filehandle for $k"); + ok(close($fh), "created file $k"); + } + + # we only have one queryworker from the previous test, so no need to + # clear cache twice and wait on monitor after "mogadm settings set" + foreach my $cslk (qw(on off)) { + ok($tmptrack->mogadm("settings", "set", "case_sensitive_list_keys", $cslk), "case_sensitive_list_keys = $cslk"); + ok($be->do_request("clear_cache", {}), "cleared_cache"); + my @l = $mogc->list_keys("under_"); + is_deeply(['under_score', [ 'under_score' ]], \@l, "list_keys handled underscore properly (case-sensitive $cslk)"); + + @l = $mogc->list_keys("per%"); + is_deeply(['per%cent', [ 'per%cent' ]], \@l, "list_keys handled % properly (case-sensitive $cslk)"); + + @l = $mogc->list_keys("back\\"); + is_deeply(['back\slash', [ 'back\slash' ]], \@l, "list_keys handled \\ properly (case-sensitive $cslk)"); + } + + # restore default + $sto->set_server_setting('case_sensitive_list_keys', undef); +} + +ok($tmptrack->mogadm(qw(host mark hostA readonly)), "host state=readonly"); + +done_testing(); diff --git a/t/01-domain-class.t b/t/01-domain-class.t new file mode 100644 index 00000000..3f7f8cd4 --- /dev/null +++ b/t/01-domain-class.t @@ -0,0 +1,160 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Factory; +use MogileFS::Factory::Domain; +use MogileFS::Factory::Class; +use MogileFS::Domain; +use MogileFS::Class; + +use Data::Dumper qw/Dumper/; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +# Fetch the factories. +my $domfac = MogileFS::Factory::Domain->get_factory; +ok($domfac, "got a domain factory"); +my $classfac = MogileFS::Factory::Class->get_factory; +ok($classfac, "got a class factory"); + +# Ensure the inherited singleton is good. +ok($domfac != $classfac, "factories are not the same singleton"); + +{ + # Add in a test domain. + my $dom = $domfac->set({ dmid => 1, namespace => 'toast'}); + ok($dom, "made a new domain object"); + is($dom->id, 1, "domain id is 1"); + is($dom->name, 'toast', 'domain namespace is toast'); + + # Add in a test class. + my $cls = $classfac->set({ classid => 1, dmid => 1, mindevcount => 3, + replpolicy => '', classname => 'fried'}); + ok($cls, "got a class object"); + is($cls->id, 1, "class id is 1"); + is($cls->name, 'fried', 'class name is fried'); + is(ref($cls->domain), 'MogileFS::Domain', + 'class can find a domain object'); +} + +# Add a few more classes and domains. +{ + my $dom2 = $domfac->set({ dmid => 2, namespace => 'harro' }); + $classfac->set({ classid => 1, dmid => 2, mindevcount => 2, + replpolicy => '', classname => 'red' }); + $classfac->set({ classid => 2, dmid => 2, mindevcount => 3, + replpolicy => 'MultipleHosts(2)', classname => 'green' }); + $classfac->set({ classid => 3, dmid => 2, mindevcount => 4, + replpolicy => 'MultipleHosts(5)', classname => 'blue' }); +} + +# Ensure the select and remove factory methods work. +{ + my $dom = $domfac->get_by_id(1); + is($dom->name, 'toast', 'got the right domain from get_by_id'); +} + +{ + my $dom = $domfac->get_by_name('harro'); + is($dom->id, 2, 'got the right domain from get_by_name'); +} + +{ + my @doms = $domfac->get_all; + is(scalar(@doms), 2, 'got two domains back from get_all'); + for (@doms) { + is(ref($_), 'MogileFS::Domain', 'and both are domains'); + } + isnt($doms[0]->id, $doms[1]->id, 'and both are not the same'); +} + +{ + my $dom = $domfac->get_by_name('harro'); + my $clsmap = $classfac->map_by_id($dom); + is(ref($clsmap), 'HASH', 'got a mapped class hash'); + is($clsmap->{2}->name, 'green', 'got the right class set'); + + $classfac->remove($clsmap->{2}); + + my $cls = $classfac->get_by_name($dom, 'green'); + ok(!$cls, "class removed from factory"); +} + +# Test the domain routines harder. +{ + my $dom = $domfac->get_by_name('harro'); + my @classes = $dom->classes; + # Magic "default" class is included + is(scalar(@classes), 3, 'found three classes'); + + ok($dom->class('blue'), 'found the blue class'); + ok(!$dom->class('fried'), 'did not find the fried class'); +} + +# Test the class routines harder. +{ + my $dom = $domfac->get_by_name('harro'); + my $cls = $dom->class('blue'); + my $polobj = $cls->repl_policy_obj; + ok($polobj, 'class can create policy object'); +} + +# Add a domain and two classes to the DB. +{ + my $domid = $sto->create_domain('foo'); + ok($domid, 'new domain stored in database: ' . $domid); + + my $clsid1 = $sto->create_class($domid, 'bar'); + my $clsid2 = $sto->create_class($domid, 'baz'); + is($clsid1, 1, 'new class1 stored in database'); + is($clsid2, 2, 'new class2 stored in database'); + + ok($sto->update_class_mindevcount(dmid => $domid, classid => $clsid2, + mindevcount => 3), 'can set mindevcount'); + ok($sto->update_class_replpolicy(dmid => $domid, classid => $clsid2, + replpolicy => 'MultipleHosts(6)'), 'can set replpolicy'); + ok($sto->update_class_name(dmid => $domid, classid => $clsid2, + classname => 'boo'), 'can rename class'); + ok($sto->update_class_hashtype(dmid => $domid, classid => $clsid2, + hashtype => 1), 'can set checksum type'); + ok($sto->update_class_hashtype(dmid => $domid, classid => $clsid2, + hashtype => undef), 'can unset checksum type'); +} + +{ + # Reload from the DB and confirm they came back the way they went in. + my %domains = $sto->get_all_domains; + ok(exists $domains{foo}, 'domain foo exists'); + is($domains{foo}, 1, 'and the id is 1'); + my @classes = $sto->get_all_classes; + is_deeply($classes[0], { + 'replpolicy' => undef, + 'dmid' => '1', + 'classid' => '1', + 'mindevcount' => '2', + 'classname' => 'bar', + 'hashtype' => undef, + }, 'class bar came back'); + # We edited class2 a bunch, make sure that all stuck. + is_deeply($classes[1], { + 'replpolicy' => 'MultipleHosts(6)', + 'dmid' => '1', + 'classid' => '2', + 'mindevcount' => '3', + 'classname' => 'boo', + 'hashtype' => undef, + }, 'class baz came back as boo'); +} + +done_testing(); diff --git a/t/02-host-device.t b/t/02-host-device.t new file mode 100644 index 00000000..e95c83c1 --- /dev/null +++ b/t/02-host-device.t @@ -0,0 +1,175 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Factory; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use MogileFS::Host; +use MogileFS::Device; + +use Data::Dumper qw/Dumper/; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +# Fetch the factories. +my $hostfac = MogileFS::Factory::Host->get_factory; +ok($hostfac, "got a host factory"); +my $devfac = MogileFS::Factory::Device->get_factory; +ok($devfac, "got a device factory"); + +MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + +# Ensure the inherited singleton is good. +ok($hostfac != $devfac, "factories are not the same singleton"); + +{ + # Test host. + my $host = $hostfac->set({ hostid => 1, hostname => 'foo', hostip => +'127.0.0.5', status => 'alive', http_port => 7500, observed_state => +'reachable'}); + ok($host, 'made a new host object'); + is($host->id, 1, 'host id is 1'); + is($host->name, 'foo', 'host name is foo'); + + # Test device. + my $dev = $devfac->set({ devid => 1, hostid => 1, status => 'alive', +weight => 100, mb_total => 5000, mb_used => 300, mb_asof => 1295217165, +observed_state => 'writeable'}); + ok($dev, 'made a new dev object'); + is($dev->id, 1, 'dev id is 1'); + is($dev->host->name, 'foo', 'name of devs host is foo'); + ok($dev->can_delete_from, 'can_delete_from works'); + ok($dev->can_read_from, 'can_read_from works'); + ok($dev->should_get_new_files, 'should_get_new_files works'); + + # monitor needs to respect can_read_from, + # everything else respects should_read_from + { + foreach my $s (qw/down dead/) { + $host->{status} = $s; + ok(!$host->alive, "host is not alive when $s"); + ok(!$dev->can_read_from, "can_read_from for device fails when host is $s"); + ok(!$dev->should_read_from, "device should not be readable when host is $s"); + } + $host->{status} = "readonly"; + ok($dev->can_read_from, "device is readable from again"); + ok(! $dev->should_get_new_files, "device should not get new files"); + + $host->{status} = "alive"; + ok($dev->can_read_from, "device is readable from again"); + ok($dev->should_get_new_files, "device should get new files again"); + } + + # first ensure device status is respected + { + foreach my $s (qw/down dead/) { + $dev->{status} = $s; + ok(!$dev->should_read_from, "device is NOT readable when $s"); + } + foreach my $s (qw/readonly drain alive/) { + $dev->{status} = $s; + ok($dev->should_read_from, "device readable when $s"); + } + } + + # take host observed states into account for should_read_from + { + $host->{observed_state} = "unreachable"; + ok($dev->can_read_from, "device can be read from by monitor of unreachable"); + ok(! $dev->should_read_from, "device should not be read from by non-monitor workers"); + ok(! $dev->observed_readable, "device not readable"); + ok(! $dev->observed_writeable, "device not writeable"); + ok($dev->observed_unreachable, "device is unreachable"); + + $host->{observed_state} = "reachable"; + ok($dev->should_read_from, "device is readable again by non-monitor workers"); + ok($dev->observed_writeable, "device writable again"); + ok(! $dev->observed_unreachable, "device is reachable again"); + } + + # take device observed states into account for should_read_from + { + $dev->{observed_state} = "unreachable"; + ok(!$dev->should_read_from, "device should not be read from when observed unreachable"); + foreach my $s (qw/readable writeable/) { + $dev->{observed_state} = $s; + ok($dev->should_read_from, "device should be read from when observed $s"); + } + } + + $hostfac->remove($host); + $devfac->remove($dev); +} + +# Might be able to skip the factory tests, as domain/class cover those. + +{ + # Add a host and two devices to the DB. + my $hostid = $sto->create_host('foo', '127.0.0.7'); + is($hostid, 1, 'new host got id 1'); + + # returns 1 instead of the devid :( + # since this it the only place which doesn't autogenerate its id. + ok($sto->create_device(1, $hostid, 'alive'), 'created dev1'); + ok($sto->create_device(2, $hostid, 'down'), 'created dev2'); + + # Update host details to DB and ensure they stick. + ok($sto->update_host($hostid, { http_port => 6500, http_get_port => 6501 }), + 'updated host DB entry'); + # Update device details in DB and ensure they stick. + ok($sto->update_device(1, { mb_total => 150, mb_used => 8 }), + 'updated dev1 DB entry'); + ok($sto->update_device(2, { mb_total => 100, mb_used => 3, + status => 'dead' }), 'updated dev2 DB entry'); + + # Test duplication errors. +} + +{ + # Reload from DB and confirm they match what we had before. + my @hosts = $sto->get_all_hosts; + my @devs = $sto->get_all_devices; + + is_deeply($hosts[0], { + 'http_get_port' => 6501, + 'status' => 'down', + 'http_port' => '6500', + 'hostip' => '127.0.0.7', + 'hostname' => 'foo', + 'hostid' => '1', + 'altip' => undef, + 'altmask' => undef + }, 'host is as expected'); + + is_deeply($devs[0], { + 'mb_total' => 150, + 'mb_used' => 8, + 'status' => 'alive', + 'devid' => '1', + 'weight' => '100', + 'mb_asof' => undef, + 'hostid' => '1' + }, 'dev1 is as expected'); + is_deeply($devs[1], { + 'mb_total' => 100, + 'mb_used' => 3, + 'status' => 'dead', + 'devid' => '2', + 'weight' => '100', + 'mb_asof' => undef, + 'hostid' => '1' + }, 'dev2 is as expected'); +} + +done_testing(); diff --git a/t/10-weighting.t b/t/10-weighting.t index 3bee554d..01734af2 100644 --- a/t/10-weighting.t +++ b/t/10-weighting.t @@ -26,9 +26,7 @@ find_mogclient_or_skip(); # etc my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 16; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -76,11 +74,17 @@ ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); +# just ensure the "set_weight" command doesn't ERR out +ok($tmptrack->mogadm("device", "modify", "hostA", 1, "--weight=50"), "set dev1 weight=50 on hostA"); +ok($tmptrack->mogadm("device", "modify", "hostB", 2, "--weight=50"), "set dev2 weight=50 on hostB"); + # wait for monitor { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } @@ -116,3 +120,4 @@ for (1..100) { ok($stats{1} < 15, "Device 1 should get roughly 5% of traffic, got: $stats{1}"); ok($stats{2} > 80, "Device 2 should get roughly 95% of traffic, got: $stats{2}"); +done_testing(); diff --git a/t/20-filepaths.t b/t/20-filepaths.t index 5c9b1c61..e76e1976 100644 --- a/t/20-filepaths.t +++ b/t/20-filepaths.t @@ -27,9 +27,7 @@ plan skip_all => "Filepaths plugin has been separated from the server, a bit of exit 0; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 19; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -70,7 +68,7 @@ ok($mogc->filepaths_enable, "Filepaths enabled successfully"); { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } @@ -138,4 +136,6 @@ my $data = "My test file.\n" x 1024; ok($mogc->filepaths_disable, "Filepaths disabled successfully"); +done_testing(); + # vim: filetype=perl diff --git a/t/30-rebalance.t b/t/30-rebalance.t index d25daafd..c6b6d990 100644 --- a/t/30-rebalance.t +++ b/t/30-rebalance.t @@ -12,9 +12,7 @@ use MogileFS::Test; find_mogclient_or_skip(); my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 40; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } @@ -78,18 +76,13 @@ ok($tmptrack->mogadm("host", "add", "hostC", "--ip=$hostC_ip", "--status=alive") ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC"); ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC"); -ok($tmptrack->mogadm("device", "mark", "hostA", 1, "alive"), "dev1 alive"); -ok($tmptrack->mogadm("device", "mark", "hostA", 2, "alive"), "dev2 alive"); -ok($tmptrack->mogadm("device", "mark", "hostB", 3, "alive"), "dev3 alive"); -ok($tmptrack->mogadm("device", "mark", "hostB", 4, "alive"), "dev4 alive"); -ok($tmptrack->mogadm("device", "mark", "hostC", 5, "alive"), "dev5 alive"); -ok($tmptrack->mogadm("device", "mark", "hostC", 6, "alive"), "dev6 alive"); - # wait for monitor { my $was = $be->{timeout}; # can't use local on phash :( $be->{timeout} = 10; - ok($be->do_request("do_monitor_round", {}), "waited for monitor") + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") or die "Failed to wait for monitor"; $be->{timeout} = $was; } @@ -108,17 +101,22 @@ for my $n (1..$n_files) { pass("Created a ton of files"); # wait for replication to go down +# We need to wait for BOTH queues to be empty before we continue to rebalance. +# If there is anything left w/ a devid that we rebalance away from, there would +# be a failure when the HTTP delete happens simultaenously to the replication. +# This will manifest as subtest 48 failing often... { my $iters = 30; - my $to_repl_rows; + my ($to_repl_rows, $to_queue_rows); while ($iters) { $iters--; $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate"); - last if ! $to_repl_rows; - diag("Files to replicate: $to_repl_rows"); + $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); + last if $to_repl_rows eq 0 && $to_queue_rows eq 0; + diag("Files to replicate: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows"); sleep 1; } - die "Failed to replicate all $n_files files" if $to_repl_rows; + die "Failed to replicate all $n_files files" if $to_repl_rows || $to_queue_rows; pass("Replicated all $n_files files"); } @@ -127,15 +125,21 @@ use MogileFS::Device; use MogileFS::Host; use MogileFS::Config; use MogileFS::Rebalance; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; use Data::Dumper qw/Dumper/; -my @devs = MogileFS::Device->devices; -my @hosts = MogileFS::Host->hosts; +my $dfac = MogileFS::Factory::Device->get_factory; +my $hfac = MogileFS::Factory::Host->get_factory; + +map { $hfac->set($_) } $sto->get_all_hosts; +map { $dfac->set($_) } $sto->get_all_devices; +my @devs = $dfac->get_all; ### Hacks to make tests work :/ $MogileFS::Config::skipconfig = 1; MogileFS::Config->load_config; -for my $h (@hosts) { +for my $h ($hfac->get_all) { print "hostid: ", $h->id, " name: ", $h->hostname, "\n"; $h->{observed_state} = "reachable"; } @@ -182,6 +186,14 @@ if ($@) { #print Dumper($saved_state), "\n"; #print Dumper($devfids2), "\n"; +# ensure all devices are still marked alive. +ok($tmptrack->mogadm("device", "mark", "hostA", 1, "alive"), "dev1 alive"); +ok($tmptrack->mogadm("device", "mark", "hostA", 2, "alive"), "dev2 alive"); +ok($tmptrack->mogadm("device", "mark", "hostB", 3, "alive"), "dev3 alive"); +ok($tmptrack->mogadm("device", "mark", "hostB", 4, "alive"), "dev4 alive"); +ok($tmptrack->mogadm("device", "mark", "hostC", 5, "alive"), "dev5 alive"); +ok($tmptrack->mogadm("device", "mark", "hostC", 6, "alive"), "dev6 alive"); + use MogileFS::Admin; my $moga = MogileFS::Admin->new( domain => "testdom", @@ -190,6 +202,21 @@ my $moga = MogileFS::Admin->new( ok(! defined $moga->rebalance_stop); my $res; + +# Quickly test the "no dupes" policy. +# ensures that source devices are properly filtered. +my $rebal_pol_dupes = "from_devices=1"; +ok($res = $moga->rebalance_set_policy($rebal_pol_dupes)); +if (! defined $res) { + print "Admin error: ", $moga->errstr, "\n"; +} +ok($res = $moga->rebalance_test); +{ + for my $dev (sort split /,/, $res->{ddevs}) { + ok($dev != 1); + } +} + ok($res = $moga->rebalance_set_policy($rebal_pol)); if (! defined $res) { print "Admin error: ", $moga->errstr, "\n"; @@ -209,19 +236,23 @@ if ($res) { # print "Start results: ", Dumper($res), "\n\n"; } -sleep 5; +# This sleep should be replaced with a "rebalance status" check to confirm +# it's been started. Otherwise there's up to two seconds where JobMaster might +# not have seen the start request yet. Lowered the sleep from 5 to 3. +sleep 3; { my $iters = 30; - my $to_repl_rows; + my ($to_repl_rows, $to_queue_rows); while ($iters) { $iters--; - $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); - last if ! $to_repl_rows; - diag("Files to rebalance: $to_repl_rows"); + $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate"); + $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue"); + last if $to_repl_rows eq 0 && $to_queue_rows eq 0; + diag("Files to rebalance: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows"); sleep 1; } - die "Failed to rebalance all files" if $to_repl_rows; + die "Failed to rebalance all files" if $to_repl_rows || $to_queue_rows; pass("Replicated all files"); } @@ -236,11 +267,4 @@ sleep 5; # - fiddle mbused/mbfree for devices and test the percentages # - test move limits (count, size, etc) -sub try_for { - my ($tries, $code) = @_; - for (1..$tries) { - return 1 if $code->(); - sleep 1; - } - return 0; -} +done_testing(); diff --git a/t/40-httpfile.t b/t/40-httpfile.t new file mode 100644 index 00000000..ddcfa51a --- /dev/null +++ b/t/40-httpfile.t @@ -0,0 +1,133 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; + +find_mogclient_or_skip(); + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my $dbh = $sto->dbh; +my $rv; + +my ($hostA_ip) = (qw/127.0.1.1/); + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored($hostA_ip, $mogroot{1}); + +while (! -e "$mogroot{1}/dev1/usage") { + print "Waiting on usage...\n"; + sleep 1; +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($be->do_request("test", {}), "test ping worked"); + +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=$hostA_ip", "--status=alive"), "created hostA"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); + +# wait for monitor +{ + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +# create a file +my $fh = $mogc->new_file("file", "1copy") + or die "Failed to create file: " . $mogc->errstr; +my $data = "DATA"; +print $fh $data; +close($fh) or die "Failed to close file"; +my @paths = $mogc->get_paths("file"); + +use MogileFS::Device; +use MogileFS::Host; +use MogileFS::Config; +use MogileFS::Rebalance; +use MogileFS::Factory::Host; +use MogileFS::Factory::Device; +use Digest::MD5 qw/md5/; + +my $dfac = MogileFS::Factory::Device->get_factory; +my $hfac = MogileFS::Factory::Host->get_factory; + +map { $hfac->set($_) } $sto->get_all_hosts; +map { $dfac->set($_) } $sto->get_all_devices; +my @devs = $dfac->get_all; + +### Hacks to make tests work :/ +$MogileFS::Config::skipconfig = 1; +MogileFS::Config->load_config; + +my $file = MogileFS::HTTPFile->at($paths[0]); +my $md5_digest; + +$md5_digest = $file->digest_mgmt("MD5", sub {}); +ok($md5_digest eq md5("DATA"), "mgmt only"); +my $cb_called = 0; +$md5_digest = $file->digest_http("MD5", sub { $cb_called++ }); +ok(1 == $cb_called, "ping callback called"); +ok($md5_digest eq md5("DATA"), "http only"); + +$md5_digest = $file->digest("MD5", sub {}); +ok($md5_digest eq md5("DATA"), "mgmt or http"); +ok(length($md5_digest) == 16, "MD5 is 16 bytes (128 bits)"); + +my $size = 100 * 1024 * 1024; +$fh = $mogc->new_file("largefile", "1copy") + or die "Failed to create largefile: " . $mogc->errstr; +$data = "LARGE" x 20; +my $expect = Digest::MD5->new; +foreach my $i (1..(1024 * 1024)) { + $expect->add($data); + print $fh $data or die "failed to write chunk $i for largefile"; +} +close($fh) or die "Failed to close largefile"; +$expect = $expect->digest; +@paths = $mogc->get_paths("largefile"); +$file = MogileFS::HTTPFile->at($paths[0]); +ok($size == $file->size, "big file size match $size"); +ok($file->digest_mgmt('MD5', sub {}) eq $expect, "digest_mgmt('MD5') on big file"); +ok($file->digest_http('MD5', sub {}) eq $expect, "digest_http('MD5') on big file"); + +ok($file->delete, "file deleted"); +is(-1, $file->digest_http('MD5', sub {}), "digest_http detected missing"); +is(-1, $file->digest_mgmt('MD5', sub {}), "digest_mgmt detected missing"); + +done_testing(); diff --git a/t/50-checksum.t b/t/50-checksum.t new file mode 100644 index 00000000..8e7b0aa8 --- /dev/null +++ b/t/50-checksum.t @@ -0,0 +1,454 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms1, "got mogstored2"); + +try_for(30, sub { + print "Waiting on usage...\n"; + -e "$mogroot{1}/dev1/usage" && -e "$mogroot{2}/dev2/usage"; +}); + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +sub full_fsck { + my $tmptrack = shift; + + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); +} + +wait_for_monitor($be); + +my ($req, $rv, %opts, @paths, @fsck_log); +my $ua = LWP::UserAgent->new; + +use Data::Dumper; +use Digest::MD5 qw/md5_hex/; + +# verify upload checksum +{ + my $key = "ok"; + %opts = ( domain => "testdom", class => "1copy", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + is($sto->get_checksum($opts{fid}), undef, "checksum not saved"); + ok($mogc->file_info($key), "file_info($key) is sane"); +} + +# corrupted upload checksum fails +{ + my $key = 'corrupt'; + %opts = ( domain => "testdom", class => "1copy", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + + $opts{checksumverify} = 1; + $opts{checksum} = "MD5:".md5_hex('fail'); + $rv = $be->do_request("create_close", \%opts); + ok(!defined($rv), "checksum verify noticed mismatch"); + my $hex = md5_hex('blah'); + is('checksum_mismatch', $be->{lasterr}, "error code is correct"); + ok($be->{lasterrstr} =~ /actual: MD5:$hex;/, "error message shows actual:"); + is($sto->get_checksum($opts{fid}), undef, "checksum not saved"); + is($mogc->file_info($key), undef, "$key not uploaded"); +} + +# enable saving MD5 checksums in "2copies" class +{ + %opts = ( domain => "testdom", class => "2copies", + hashtype => "MD5", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# save new row to checksum table +{ + my $key = 'savecksum'; + + want($admin, 0, "replicate"); + + %opts = ( domain => "testdom", class => "2copies", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + my $row = $sto->get_checksum($opts{fid}); + ok($row, "checksum saved"); + my $info = $mogc->file_info($key); + ok($info, "file_info($key) is sane"); + is($info->{checksum}, "MD5:".md5_hex('blah'), 'checksum shows up'); + is($sto->delete_checksum($info->{fid}), 1, "$key checksum row deleted"); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", 'checksum is MISSING after delete'); + + want($admin, 1, "replicate"); + + # wait for replicate to recreate checksum + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) != 1; + }); + is(scalar(@paths), 2, "replicate successfully with good checksum"); + + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex('blah'), 'checksum recreated on repl'); +} + +# flip checksum classes around +{ + my @classes; + %opts = ( domain => "testdom", class => "1copy", mindevcount => 1 ); + + $opts{hashtype} = "NONE"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{hashtype}, undef, "hashtype unset"); + + $opts{hashtype} = "MD5"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{hashtype}, 1, "hashtype is 1 (MD5)"); + + $opts{hashtype} = "NONE"; + ok($be->do_request("update_class", \%opts), "update class"); + @classes = grep { $_->{classname} eq '1copy' } $sto->get_all_classes; + is($classes[0]->{hashtype}, undef, "hashtype unset"); +} + +# save checksum on replicate, client didn't care to provide one +{ + my $key = 'lazycksum'; + + want($admin, 0, "replicate"); + + my $fh = $mogc->new_file($key, "2copies"); + print $fh "lazy"; + ok(close($fh), "closed file"); + my $info = $mogc->file_info($key); + is($info->{checksum}, 'MISSING', 'checksum is MISSING'); + + want($admin, 1, "replicate"); + + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) != 1; + }); + is(scalar(@paths), 2, "replicate successfully with good checksum"); + + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after repl'); +} + +# fsck recreates checksum when missing +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + $sto->delete_checksum($info->{fid}); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", "checksum is missing"); + full_fsck($tmptrack); + + try_for(30, sub { + $info = $mogc->file_info($key); + $info->{checksum} ne "MISSING"; + }); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum is set after fsck'); + + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "NSUM", "missing checksum logged"); +} + +# fsck fixes a file corrupt file +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for lazycksum"); + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("LAZY"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "LAZY", "file successfully corrupted"); + is($ua->get($paths[1])->content, "lazy", "paths[1] not corrupted"); + + full_fsck($tmptrack); + + try_for(30, sub { + @fsck_log = $sto->fsck_log_rows; + scalar(@fsck_log) != 0; + }); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "REPL", "repl for mismatched checksum logged"); + + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) >= 2; + }); + + is(scalar(@paths), 2, "2 paths for key after replication"); + is($ua->get($paths[0])->content, "lazy", "paths[0] is correct"); + is($ua->get($paths[1])->content, "lazy", "paths[1] is correct"); + $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("lazy"), 'checksum unchanged after fsck'); +} + +# fsck notices when all files are corrupt +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for lazycksum"); + + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("0000"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "0000", "successfully corrupted"); + + $req = HTTP::Request->new(PUT => $paths[1]); + $req->content("1111"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[1])->content, "1111", "successfully corrupted"); + + full_fsck($tmptrack); + + try_for(30, sub { + @fsck_log = $sto->fsck_log_rows; + scalar(@fsck_log) != 0; + }); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "2 paths for checksum"); + @paths = sort( map { $ua->get($_)->content } @paths); + is(join(', ', @paths), "0000, 1111", "corrupted content preserved"); +} + +# reuploaded checksum row clobbers old checksum +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + + ok($sto->get_checksum($info->{fid}), "old checksum row exists"); + + my $fh = $mogc->new_file($key, "2copies"); + print $fh "HAZY"; + ok(close($fh), "closed replacement file (lazycksum => HAZY)"); + + try_for(30, sub { ! $sto->get_checksum($info->{fid}); }); + is($sto->get_checksum($info->{fid}), undef, "old checksum is gone"); +} + +# completely corrupted files with no checksum row +{ + my $key = 'lazycksum'; + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) >= 2; + }); + is(scalar(@paths), 2, "replicated succesfully"); + + my $info = $mogc->file_info($key); + is($info->{checksum}, "MD5:".md5_hex("HAZY"), "checksum created on repl"); + + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("MAYB"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[0])->content, "MAYB", "successfully corrupted (MAYB)"); + + $req = HTTP::Request->new(PUT => $paths[1]); + $req->content("CRZY"); + $rv = $ua->request($req); + ok($rv->is_success, "upload to corrupt a file successful"); + is($ua->get($paths[1])->content, "CRZY", "successfully corrupted (CRZY)"); + + is($sto->delete_checksum($info->{fid}), 1, "nuke new checksum"); + $info = $mogc->file_info($key); + is($info->{checksum}, "MISSING", "checksum is MISSING"); + + full_fsck($tmptrack); + + try_for(30, sub { + @fsck_log = $sto->fsck_log_rows; + scalar(@fsck_log) != 0; + }); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); +} + +# disable MD5 checksums in "2copies" class +{ + %opts = ( domain => "testdom", class => "2copies", + hashtype => "NONE", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# use fsck_checksum=MD5 instead of per-class checksums +{ + my $key = 'lazycksum'; + my $info = $mogc->file_info($key); + $sto->delete_checksum($info->{fid}); + + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "MD5"), "enable fsck_checksum=MD5"); + wait_for_monitor($be); + full_fsck($tmptrack); + + try_for(30, sub { + @fsck_log = $sto->fsck_log_rows; + scalar(@fsck_log) != 0; + }); + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "MSUM", "MSUM logged"); +} + +# ensure server setting is visible +use MogileFS::Admin; +{ + my $settings = $moga->server_settings; + is($settings->{fsck_checksum}, 'MD5', "fsck_checksum server setting visible"); +} + +use MogileFS::Config; + +# disable checksumming entirely, regardless of class setting +{ + %opts = ( domain => "testdom", class => "2copies", + hashtype => "MD5", mindevcount => 2 ); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); + + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "off"), "set fsck_checksum=off"); + wait_for_monitor($be); + my $settings = $moga->server_settings; + is($settings->{fsck_checksum}, 'off', "fsck_checksum server setting visible"); + full_fsck($tmptrack); + my $nr; + try_for(1000, sub { + $nr = $sto->file_queue_length(FSCK_QUEUE); + $nr eq '0'; + }); + is($nr, '0', "fsck finished"); + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 0, "fsck log is empty with fsck_checksum=off"); +} + +# set fsck_checksum=class and ensure that works again +{ + my $info = $mogc->file_info('lazycksum'); + ok($tmptrack->mogadm("settings", "set", "fsck_checksum", "class"), "set fsck_checksum=class"); + wait_for_monitor($be); + my $settings = $moga->server_settings; + ok(! defined($settings->{fsck_checksum}), "fsck_checksum=class server setting hidden (default)"); + full_fsck($tmptrack); + + try_for(30, sub { + @fsck_log = $sto->fsck_log_rows; + scalar(@fsck_log) != 0; + }); + + is(scalar(@fsck_log), 1, "fsck log has one row"); + is($fsck_log[0]->{fid}, $info->{fid}, "fid matches in fsck log"); + is($fsck_log[0]->{evcode}, "BSUM", "BSUM logged"); +} + +done_testing(); diff --git a/t/51-checksum_class_change.t b/t/51-checksum_class_change.t new file mode 100644 index 00000000..cea0b63c --- /dev/null +++ b/t/51-checksum_class_change.t @@ -0,0 +1,139 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); +use MogileFS::Admin; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms1, "got mogstored2"); + +try_for(30, sub { + print "Waiting on usage...\n"; + -e "$mogroot{1}/dev1/usage" && -e "$mogroot{2}/dev2/usage"; +}); + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "changer", "--mindevcount=2", "--hashtype=MD5"), "created changer class in testdom with hashtype=MD5"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +wait_for_monitor($be); +want($admin, 0, "replicate"); + +my ($req, $rv, %opts, @paths, @fsck_log); +my $ua = LWP::UserAgent->new; + +use Data::Dumper; +use Digest::MD5 qw/md5_hex/; + +my $key = "foo"; +{ + %opts = ( domain => "testdom", class => "changer", key => $key ); + $rv = $be->do_request("create_open", \%opts); + %opts = %$rv; + ok($rv && $rv->{path}, "create_open succeeded"); + $req = HTTP::Request->new(PUT => $rv->{path}); + $req->content("blah"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + $opts{key} = $key; + $opts{domain} = "testdom"; + $opts{checksum} = "MD5:".md5_hex('blah'); + $opts{checksumverify} = 1; + $rv = $be->do_request("create_close", \%opts); + ok($rv, "checksum verified successfully"); + ok($sto->get_checksum($opts{fid}), "checksum saved"); + ok($mogc->file_info($key), "file_info($key) is sane"); +} + +# disable MD5 checksums in "changer" class +{ + %opts = ( domain => "testdom", class => "changer", + hashtype => "NONE", mindevcount => 2); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +# replicate should work even if we have, but don't need a checksum anymore +{ + want($admin, 1, "replicate"); + + # wait for replicate to recreate checksum + try_for(30, sub { + @paths = $mogc->get_paths($key); + scalar(@paths) != 1; + }); + is(scalar(@paths), 2, "replicated successfully"); + want($admin, 0, "replicate"); +} + +# switch to SHA-1 checksums in "changer" class +{ + %opts = ( domain => "testdom", class => "changer", + hashtype => "SHA-1", mindevcount => 2); + ok($be->do_request("update_class", \%opts), "update class"); + wait_for_monitor($be); +} + +{ + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); + + try_for(30, sub { @fsck_log = $sto->fsck_log_rows; }); + is($fsck_log[0]->{evcode}, "BALG", "bad checksum algorithm logged"); +} + +done_testing(); diff --git a/t/60-fsck.t b/t/60-fsck.t new file mode 100644 index 00000000..66130ac3 --- /dev/null +++ b/t/60-fsck.t @@ -0,0 +1,592 @@ +# -*-perl-*- +# some of the comments match the comments in MogileFS/Worker/Fsck.pm +# _exactly_ for reference purposes +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +use HTTP::Request; +find_mogclient_or_skip(); +use MogileFS::Admin; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +use File::Temp; +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{2} = File::Temp::tempdir( CLEANUP => 1 ); +$mogroot{3} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 2, 3 => 2 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored1"); +my $ms2 = create_mogstored("127.0.1.2", $mogroot{2}); +ok($ms2, "got mogstored2"); + +while (! -e "$mogroot{1}/dev1/usage" || + ! -e "$mogroot{2}/dev2/usage" || + ! -e "$mogroot{2}/dev3/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); +my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client + +# test some basic commands to backend +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom"); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("host", "add", "hostB", "--ip=127.0.1.2", "--status=alive"), "created hostB"); + +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostB", 2), "created dev2 on hostB"); + +sub wait_for_monitor { + my $be = shift; + my $was = $be->{timeout}; # can't use local on phash :( + $be->{timeout} = 10; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + ok($be->do_request("clear_cache", {}), "waited for monitor") + or die "Failed to wait for monitor"; + $be->{timeout} = $was; +} + +sub watcher_wait_for { + my ($re, $cb) = @_; + my $line; + my $watcher = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); + $watcher or die "failed to create watcher socket: $!"; + $watcher->syswrite("!watch\r\n"); + + $cb->(); # usually this is to start fsck + + do { + $line = $watcher->getline; + } until ($line =~ /$re/); + + $watcher->close; +} + +sub wait_for_empty_queue { + my ($table, $dbh) = @_; + my $limit = 600; + my $delay = 0.1; + my $i = $limit; + my $count; + while ($i > 0) { + $count = $dbh->selectrow_array("SELECT COUNT(*) from $table"); + return if ($count == 0); + sleep $delay; + } + my $time = $delay * $limit; + die "$table is not empty after ${time}s!"; +} + +sub full_fsck { + my ($tmptrack, $dbh) = @_; + + # this should help prevent race conditions: + wait_for_empty_queue("file_to_queue", $dbh); + + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + ok($tmptrack->mogadm("fsck", "start"), "started fsck"); +} + +sub unblock_fsck_queue { + my ($sto, $expect) = @_; + my $now = $sto->unix_timestamp; + my $upd = sub { $sto->dbh->do("UPDATE file_to_queue SET nexttry = $now") }; + is($sto->retry_on_deadlock($upd), $expect, "unblocked fsck queue"); +} + +sub get_worker_pids { + my ($admin, $worker) = @_; + + ok($admin->syswrite("!jobs\n"), "requested jobs"); + my @pids; + + while (my $line = $admin->getline) { + last if $line =~ /^\.\r?\n/; + + if ($line =~ /^$worker pids (\d[\d+\s]*)\r?\n/) { + @pids = split(/\s+/, $1); + } + } + ok(scalar(@pids), "got pid(s) of $worker"); + + return @pids; +} + +sub shutdown_worker { + my ($admin, $worker) = @_; + + watcher_wait_for(qr/Job $worker has only 0/, sub { + $admin->syswrite("!to $worker :shutdown\r\n"); + like($admin->getline, qr/^Message sent to 1 children/, "tracker sent message to child"); + like($admin->getline, qr/^\./, "tracker ended transmission"); + }); +} + +wait_for_monitor($be); + +my ($req, $rv, %opts, @paths, @fsck_log, $info); +my $ua = LWP::UserAgent->new; +my $key = "testkey"; +my $dbh = $sto->dbh; + +use Data::Dumper; + +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "1copy"); + print $fh "hello\n"; + ok(close($fh), "closed file"); +} + +# first obvious fucked-up case: no devids even presumed to exist. +{ + $info = $mogc->file_info($key); + is($info->{devcount}, 1, "ensure devcount is correct at start"); + + # ensure repl queue is empty before destroying file_on + wait_for_empty_queue("file_to_replicate", $dbh); + + my @devids = $sto->fid_devids($info->{fid}); + is(scalar(@devids), 1, "devids retrieved"); + is($sto->remove_fidid_from_devid($info->{fid}, $devids[0]), 1, + "delete $key from file_on table"); + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 3 && sleep(0.1)); + + wait_for_empty_queue("file_to_queue", $dbh); + @fsck_log = $sto->fsck_log_rows; + + my $nopa = $fsck_log[0]; + is($nopa->{evcode}, "NOPA", "evcode for no paths logged"); + + # entering "desperate" mode + my $srch = $fsck_log[1]; + is($srch->{evcode}, "SRCH", "evcode for start search logged"); + + # wow, we actually found it! + my $fond = $fsck_log[2]; + is($fond->{evcode}, "FOND", "evcode for start search logged"); + + $info = $mogc->file_info($key); + is($info->{devcount}, 1, "ensure devcount is correct at fsck end"); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 1, "get_paths returns correctly at fsck end"); +} + +# update class to require 2copies and have fsck fix it +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 1, "only one path exists before fsck"); + + # _NOT_ using "updateclass" command since that enqueues for replication + my $fid = MogileFS::FID->new($info->{fid}); + my $classid_2copies = $sto->get_classid_by_name($fid->dmid, "2copies"); + is($fid->update_class(classid => $classid_2copies), 1, "classid updated"); + + full_fsck($tmptrack, $dbh); + + do { + @paths = $mogc->get_paths($key); + } while (scalar(@paths) == 1 and sleep(0.1)); + is(scalar(@paths), 2, "replicated from fsck"); + + $info = $mogc->file_info($key); + is($info->{devcount}, 2, "ensure devcount is updated by replicate"); + + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) == 0 and sleep(0.1)); + + my $povi = $fsck_log[0]; + is($povi->{evcode}, "POVI", "policy violation logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); +} + +# wrong devcount in file column, but otherwise everything is OK +{ + foreach my $wrong_devcount (13, 0, 1) { + my $upd = sub { + $dbh->do("UPDATE file SET devcount = ? WHERE fid = ?", + undef, $wrong_devcount, $info->{fid}); + }; + is($sto->retry_on_deadlock($upd), 1, "set improper devcount"); + + $info = $mogc->file_info($key); + is($info->{devcount}, $wrong_devcount, "devcount is set to $wrong_devcount"); + + full_fsck($tmptrack, $dbh); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} == $wrong_devcount && sleep(0.1)); + is($info->{devcount}, 2, "devcount is corrected by fsck"); + + wait_for_empty_queue("file_to_queue", $dbh); + @fsck_log = $sto->fsck_log_rows; + is($fsck_log[0]->{evcode}, "BCNT", "bad count logged"); + } +} + +# nuke a file from disk but keep the file_on row +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + $req = HTTP::Request->new(DELETE => $paths[0]); + $rv = $ua->request($req); + ok($rv->is_success, "DELETE successful"); + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 2 && sleep(0.1)); + + my $miss = $fsck_log[0]; + is($miss->{evcode}, "MISS", "missing file logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); + + wait_for_empty_queue("file_to_replicate", $dbh); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $rv = $ua->get($path); + is($rv->content, "hello\n", "GET successful on restored path"); + } +} + +# change the length of a file from disk and have fsck correct it +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + $req = HTTP::Request->new(PUT => $paths[0]); + $req->content("hello\r\n"); + $rv = $ua->request($req); + ok($rv->is_success, "PUT successful"); + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 2 && sleep(0.1)); + + my $blen = $fsck_log[0]; + is($blen->{evcode}, "BLEN", "missing file logged by fsck"); + + my $repl = $fsck_log[1]; + is($repl->{evcode}, "REPL", "replication request logged by fsck"); + + wait_for_empty_queue("file_to_replicate", $dbh); + + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $rv = $ua->get($path); + is($rv->content, "hello\n", "GET successful on restored path"); + } +} + +# nuke a file completely and irreparably +{ + @paths = $mogc->get_paths($key); + is(scalar(@paths), 2, "two paths returned from get_paths"); + foreach my $path (@paths) { + $req = HTTP::Request->new(DELETE => $path); + $rv = $ua->request($req); + ok($rv->is_success, "DELETE successful"); + } + + full_fsck($tmptrack, $dbh); + do { + @fsck_log = $sto->fsck_log_rows; + } while (scalar(@fsck_log) < 4 && sleep(0.1)); + + is($fsck_log[0]->{evcode}, "MISS", "missing file logged for first path"); + is($fsck_log[1]->{evcode}, "MISS", "missing file logged for second path"); + is($fsck_log[2]->{evcode}, "SRCH", "desperate search attempt logged"); + is($fsck_log[3]->{evcode}, "GONE", "inability to fix FID logged"); + + wait_for_empty_queue("file_to_queue", $dbh); + $info = $mogc->file_info($key); + + is($info->{devcount}, 0, "devcount updated to zero"); + @paths = $mogc->get_paths($key); + is(scalar(@paths), 0, "get_paths returns nothing"); +} + +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "2copies"); + print $fh "hello\n"; + ok(close($fh), "closed file"); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); +} + +# stall fsck with a non-responsive host +{ + is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1"); + wait_for_monitor($be) foreach (1..3); + + shutdown_worker($admin, "job_master"); + shutdown_worker($admin, "fsck"); + + $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); + watcher_wait_for(qr/\[fsck\(\d+\)] Connectivity problem reaching device/, sub { + full_fsck($tmptrack, $dbh); + }); + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); +} + +# resume fsck when host is responsive again +{ + is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1"); + wait_for_monitor($be); + + shutdown_worker($admin, "fsck"); + + # force fsck to wakeup and do work again + unblock_fsck_queue($sto, 1); + + ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); + ok($admin->getline, "got wakeup response 1"); + ok($admin->getline, "got wakeup response 2"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + + sleep 1; + } + + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue emptied"); +} + +# upload a file and wait for replica to appear +{ + my $fh = $mogc->new_file($key, "2copies"); + print $fh "hello\n"; + ok(close($fh), "closed file"); + + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); +} + +# stall fsck with a non-responsive host +# resume fsck when host is responsive again +{ + is(kill("STOP", $ms1->pid), 1, "send SIGSTOP to mogstored1 to stall"); + wait_for_monitor($be); + + watcher_wait_for(qr/\[fsck\(\d+\)] Connectivity problem reaching device/, sub { + full_fsck($tmptrack, $dbh); + }); + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 1, "fsck queue still blocked"); + + is(kill("CONT", $ms1->pid), 1, "send SIGCONT to mogstored1 to resume"); + wait_for_monitor($be); + + # force fsck to wakeup and do work again + unblock_fsck_queue($sto, 1); + ok($admin->syswrite("!to fsck :wake_up\n"), "force fsck to wake up"); + ok($admin->getline, "got wakeup response 1"); + ok($admin->getline, "got wakeup response 2"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + + sleep 1; + } + + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue emptied"); +} + +# cleanup over-replicated file +{ + $info = $mogc->file_info($key); + is($info->{devcount}, 2, "ensure devcount is correct at start"); + + # _NOT_ using "updateclass" command since that enqueues for replication + my $fid = MogileFS::FID->new($info->{fid}); + my $classid_1copy = $sto->get_classid_by_name($fid->dmid, "1copy"); + is($fid->update_class(classid => $classid_1copy), 1, "classid updated"); + + full_fsck($tmptrack, $dbh); + + sleep(1) while $mogc->file_info($key)->{devcount} == 2; + is($mogc->file_info($key)->{devcount}, 1, "too-happy file cleaned up"); + + @fsck_log = $sto->fsck_log_rows; + is($fsck_log[0]->{evcode}, "POVI", "policy violation logged"); + + # replicator is requested to delete too-happy file + is($fsck_log[1]->{evcode}, "REPL", "replication request logged"); +} + +# kill a device and replace it with another, without help from reaper +# this test may become impossible if monitor + reaper are merged... +{ + ok($mogc->update_class($key, "2copies"), "request 2 replicas again"); + do { + $info = $mogc->file_info($key); + } while ($info->{devcount} < 2); + is($info->{devcount}, 2, "ensure devcount is correct at start"); + wait_for_empty_queue("file_to_replicate", $dbh); + + my (@reaper_pids) = get_worker_pids($admin, "reaper"); + is(scalar(@reaper_pids), 1, "only one reaper pid"); + my $reaper_pid = $reaper_pids[0]; + ok($reaper_pid, "got pid of reaper"); + + # reaper is watchdog is 240s, and it wakes up in 5s intervals right now + # so we should be safe from watchdog for now... + ok(kill("STOP", $reaper_pid), "stopped reaper from running"); + + ok($tmptrack->mogadm("device", "mark", "hostB", 2, "dead"), "mark dev2 as dead"); + ok($tmptrack->mogadm("device", "add", "hostB", 3), "created dev3 on hostB"); + wait_for_monitor($be); + + full_fsck($tmptrack, $dbh); + + sleep 1 while MogileFS::Config->server_setting("fsck_host"); + + foreach my $i (1..30) { + last if $sto->file_queue_length(MogileFS::Config::FSCK_QUEUE) == 0; + sleep 1; + } + is($sto->file_queue_length(MogileFS::Config::FSCK_QUEUE), 0, "fsck queue empty"); + + # fsck should've corrected what reaper missed + @fsck_log = $sto->fsck_log_rows; + is(scalar(@fsck_log), 1, "fsck log populated"); + is($fsck_log[0]->{evcode}, "REPL", "replication enqueued"); + + ok(kill("CONT", $reaper_pid), "resumed reaper"); +} + +{ + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + + foreach my $i (1..10) { + my $fh = $mogc->new_file("k$i", "1copy"); + print $fh "$i\n"; + ok(close($fh), "closed file ($i)"); + } + $info = $mogc->file_info("k10"); + + ok($tmptrack->mogadm("settings", "set", "queue_rate_for_fsck", 1), "set queue_rate_for_fsck to 1"); + ok($tmptrack->mogadm("settings", "set", "queue_size_for_fsck", 1), "set queue_size_for_fsck to 1"); + + wait_for_monitor($be) foreach (1..3); + + shutdown_worker($admin, "job_master"); + shutdown_worker($admin, "fsck"); + + ok($tmptrack->mogadm("fsck", "clearlog"), "clear fsck log"); + ok($tmptrack->mogadm("fsck", "reset"), "reset fsck"); + $sto->retry_on_deadlock(sub { $sto->dbh->do("DELETE FROM file_to_queue") }); + ok($tmptrack->mogadm("fsck", "start"), "start fsck with slow queue rate"); + + ok(MogileFS::Config->server_setting("fsck_host"), "fsck_host set"); + is(MogileFS::Config->server_setting("fsck_fid_at_end"), $info->{fid}, "fsck_fid_at_end matches"); + + my $highest = undef; + foreach my $i (1..100) { + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + last if defined $highest; + sleep 0.1; + } + ok(defined($highest), "fsck_highest_fid_checked is set"); + like($highest, qr/\A\d+\z/, "fsck_highest_fid_checked is a digit"); + isnt($highest, $info->{fid}, "fsck is not running too fast"); + + # wait for something to get fscked + foreach my $i (1..100) { + last if MogileFS::Config->server_setting("fsck_highest_fid_checked") != $highest; + sleep 0.1; + } + + my $old_highest = $highest; + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + isnt($highest, $old_highest, "moved to next FID"); + + ok($tmptrack->mogadm("fsck", "stop"), "stop fsck"); + ok(! MogileFS::Config->server_setting("fsck_host"), "fsck_host unset"); + is(MogileFS::Config->server_setting("fsck_fid_at_end"), $info->{fid}, "fsck_fid_at_end matches"); + + # resume paused fsck + ok($tmptrack->mogadm("fsck", "start"), "restart fsck"); + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + cmp_ok($highest, '>=', $old_highest, "fsck resumed without resetting fsck_highest_fid_checked"); + + # wait for something to get fscked + foreach my $i (1..200) { + last if MogileFS::Config->server_setting("fsck_highest_fid_checked") != $highest; + sleep 0.1; + } + + $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + cmp_ok($highest, '>', $old_highest, "fsck continued to higher FID"); +} + +# upload new files, but ensure fsck does NOT reach them +{ + my $last_fid = $sto->max_fidid; + + foreach my $i (1..10) { + my $fh = $mogc->new_file("z$i", "1copy"); + print $fh "$i\n"; + ok(close($fh), "closed file (z$i)"); + } + + # crank up fsck speed again + ok($tmptrack->mogadm("settings", "set", "queue_rate_for_fsck", 100), "set queue_rate_for_fsck to 100"); + ok($tmptrack->mogadm("settings", "set", "queue_size_for_fsck", 100), "set queue_size_for_fsck to 100"); + + sleep 0.1 while MogileFS::Config->server_setting("fsck_host"); + + my $highest = MogileFS::Config->server_setting("fsck_highest_fid_checked"); + is($highest, $last_fid, "fsck didn't advance beyond what we started with"); +} + +done_testing(); diff --git a/t/70-reaper.t b/t/70-reaper.t new file mode 100644 index 00000000..0fdba930 --- /dev/null +++ b/t/70-reaper.t @@ -0,0 +1,95 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use Time::HiRes qw(sleep); +use MogileFS::Server; +use MogileFS::Test; +find_mogclient_or_skip(); +use MogileFS::Admin; +use File::Temp; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my %mogroot; +$mogroot{1} = File::Temp::tempdir( CLEANUP => 1 ); +my $dev2host = { 1 => 1, 2 => 1, 3 => 1 }; +foreach (sort { $a <=> $b } keys %$dev2host) { + my $root = $mogroot{$dev2host->{$_}}; + mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!"; +} + +my $ms1 = create_mogstored("127.0.1.1", $mogroot{1}); +ok($ms1, "got mogstored"); + +while (! -e "$mogroot{1}/dev1/usage" || + ! -e "$mogroot{1}/dev2/usage" || + ! -e "$mogroot{1}/dev3/usage") { + print "Waiting on usage...\n"; + sleep(.25); +} + +my $tmptrack = create_temp_tracker($sto); +ok($tmptrack); + +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; +my $moga = MogileFS::Admin->new(hosts => [ "127.0.0.1:7001" ]); +my $mogc = MogileFS::Client->new( + domain => "testdom", + hosts => [ "127.0.0.1:7001" ], + ); + +ok($tmptrack->mogadm("host", "add", "hostA", "--ip=127.0.1.1", "--status=alive"), "created hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA"); +ok($tmptrack->mogadm("device", "add", "hostA", 2), "created dev2 on hostA"); + +ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain"); +ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom"); +ok($tmptrack->mogadm("settings", "set", "queue_rate_for_reaper", 123), "set queue_rate_for_reaper"); + +# create one sample file with 2 copies +my $fh = $mogc->new_file("file1", "2copies"); +ok($fh, "got filehandle"); +ok(close($fh), "closed file"); + +my $tries; +my @urls; + +# wait for it to replicate +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(@urls) == 2; + sleep .1; +} + +is(scalar(@urls), 2, "replicated to 2 paths"); +my $orig_urls = join("\n", sort(@urls)); + +# add a new device and mark an existing device as dead +ok($tmptrack->mogadm("device", "add", "hostA", 3), "created dev3 on hostA"); +ok($tmptrack->mogadm("device", "mark", "hostA", 2, "dead"), "mark dev2 as dead"); + +# reaper should notice the dead device in 5-10s +for ($tries = 100; $tries--; ) { + @urls = $mogc->get_paths("file1"); + last if scalar(grep(m{/dev2/}, @urls)) == 0; + sleep 0.1; +} +is(scalar(grep(m{/dev2/}, @urls)), 0, "file1 no longer references dead dev2"); + +# replicator should replicate the file within 15-30s +for ($tries = 300; $tries--; ) { + @urls = sort($mogc->get_paths("file1")); + last if (scalar(@urls) == 2) && (join("\n", @urls) ne $orig_urls); + sleep 0.1; +} +is(grep(m{/dev3/}, @urls), 1, "file1 got copied to dev3"); +is(scalar(@urls), 2, "we have 2 paths for file1 again"); + +done_testing(); diff --git a/t/80-job_master.t b/t/80-job_master.t new file mode 100644 index 00000000..30055dbf --- /dev/null +++ b/t/80-job_master.t @@ -0,0 +1,61 @@ +# -*-perl-*- +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); +use MogileFS::Server; +use MogileFS::Test; + +my @jm_jobs = qw(fsck delete replicate); +my $jobs; + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +my $tmptrack = create_temp_tracker($sto, ["--no-job_master"]); +my $admin = IO::Socket::INET->new(PeerAddr => '127.0.0.1:7001'); +$admin or die "failed to create admin socket: $!"; + +sub jobs { + my ($admin) = @_; + my %ret; + + syswrite($admin, "!jobs\r\n"); + MogileFS::Util::wait_for_readability(fileno($admin), 10); + while (1) { + my $line = <$admin>; + $line =~ s/\r\n//; + last if $line eq "."; + $line =~ /^(\w+ \w+)\s*(.*)$/ or die "Failed to parse $line\n"; + $ret{$1} = $2; + } + return \%ret; +} + +ok(try_for(30, sub { jobs($admin)->{"queryworker count"} }), "wait for queryworker"); + +$jobs = jobs($admin); +foreach my $job (@jm_jobs) { + ok(!$jobs->{"$job count"}, "no $job workers"); +} + +# enable job master +want($admin, 1, "job_master"); + +ok(try_for(30, sub { jobs($admin)->{"queryworker count"} }), "wait for queryworker"); + +foreach my $job (@jm_jobs) { + ok(try_for(30, sub { jobs($admin)->{"$job count"} }), "wait for $job"); +} + +# disable job_master again +want($admin, 0, "job_master"); + +foreach my $job (@jm_jobs) { + ok(try_for(30, sub { !jobs($admin)->{"$job count"} }), "wait for $job to die"); +} + +done_testing(); diff --git a/t/checksum.t b/t/checksum.t new file mode 100644 index 00000000..9703c62a --- /dev/null +++ b/t/checksum.t @@ -0,0 +1,36 @@ +# -*-perl-*- + +use strict; +use warnings; +use Test::More; + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use MogileFS::Checksum; +use Digest::MD5 qw(md5 md5_hex); + +my $sto = eval { temp_store(); }; +if (!$sto) { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +$sto->set_checksum(6, 1, md5("asdf")); +my $hash = $sto->get_checksum(6); +my $csum = MogileFS::Checksum->new($hash); +is(md5_hex("asdf"), $csum->hexdigest); +is("MD5", $csum->hashname); + +my $zero = "MD5:d41d8cd98f00b204e9800998ecf8427e"; +$csum = MogileFS::Checksum->from_string(6, $zero); +is("MogileFS::Checksum", ref($csum), "is a ref"); +is("d41d8cd98f00b204e9800998ecf8427e", $csum->hexdigest, "hex matches"); +is(1, $csum->save, "save successfully"); +$hash = $sto->get_checksum(6); +my $reloaded = MogileFS::Checksum->new($hash); +is("d41d8cd98f00b204e9800998ecf8427e", $reloaded->hexdigest, "hex matches"); +my $fid_checksum = MogileFS::FID->new(6)->checksum; +is_deeply($fid_checksum, $csum, "MogileFS::FID->checksum works"); + +done_testing(); diff --git a/t/domains-classes.t b/t/domains-classes.t deleted file mode 100644 index dda580b4..00000000 --- a/t/domains-classes.t +++ /dev/null @@ -1,78 +0,0 @@ -# -*-perl-*- - -use strict; -use warnings; -use Test::More; -use FindBin qw($Bin); - -use MogileFS::Server; -use MogileFS::Util qw(error_code); -use MogileFS::Test; - -my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 26; -} else { - plan skip_all => "Can't create temporary test database: $@"; - exit 0; -} - -is(scalar MogileFS::Domain->domains, 0, "no domains at present"); - -my $dom = MogileFS::Domain->create("foo"); -ok($dom, "created a domain"); - -my $dup = eval { MogileFS::Domain->create("foo") }; -ok(!$dup, "didn't create it"); -is(error_code($@), "dup", "because it was a duplicate domain"); - -is(scalar MogileFS::Domain->domains, 1, "one domain now"); -$dom->delete; -is(scalar MogileFS::Domain->domains, 0, "back to zero domains"); - -$dom = MogileFS::Domain->create("foo"); -ok($dom, "created foo domain again"); -is(scalar MogileFS::Domain->domains, 1, "back to one domain"); - -{ - local $Mgd::_T_DOM_HAS_FILES = 1; - ok(!eval{ $dom->delete; }, "failed to delete domain"); - is(error_code($@), "has_files", "because it had files"); -} - -my @classes = $dom->classes; -is(scalar @classes, 1, "one class in domain") - or die; -is($classes[0]->name, "default", "is the default class"); -is($classes[0]->classid, 0, ".. of classid 0"); -ok(defined $classes[0]->classid, ".. which is defined"); - -my $cla = $dom->create_class("classA"); -ok($cla, "created classA"); -is(scalar($dom->classes), 2, "two classes now"); - -my $clb = $dom->create_class("classB"); -ok($clb, "created classB"); -is(scalar($dom->classes), 3, "three classes now"); - -{ - my $dup = eval { $dom->create_class("classA") }; # can't create this again - ok(!$dup, "didn't create dup of A"); - is(error_code($@), "dup", "because it was a dup"); -} - -ok($clb->set_name("classB2"), "renamed classB to B2"); -is($clb->name, "classB2", "and it renamed"); - -ok(!eval { $clb->set_name("classA") }, "failed to rename B2 to classA"); -is(error_code($@), "dup", "because it was a dup"); - -ok($clb->delete, "deleted class"); -is(scalar($dom->classes), 2, "two classes now"); - - - - - - - diff --git a/t/fid-stat.t b/t/fid-stat.t index da603151..89ee7617 100644 --- a/t/fid-stat.t +++ b/t/fid-stat.t @@ -8,8 +8,6 @@ use FindBin qw($Bin); use Mogstored::FIDStatter; use File::Temp qw(tempdir); -plan tests => 11; - my $td = tempdir(CLEANUP => 1); ok($td, "got tempdir"); ok(-d $td, "tempdir is writable"); @@ -119,5 +117,4 @@ sub make_file { close($fh) or die; } - - +done_testing(); diff --git a/t/hosts-devices.t b/t/hosts-devices.t deleted file mode 100644 index 5066985d..00000000 --- a/t/hosts-devices.t +++ /dev/null @@ -1,50 +0,0 @@ -# -*-perl-*- - -use strict; -use warnings; -use Test::More; -use FindBin qw($Bin); - -use MogileFS::Server; -use MogileFS::Util qw(error_code); -use MogileFS::Test; - -my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 18; -} else { - plan skip_all => "Can't create temporary test database: $@"; - exit 0; -} - -is(scalar MogileFS::Host->hosts, 0, "no hosts at present"); -is(scalar MogileFS::Device->devices, 0, "no devices at present"); - -my $ha = MogileFS::Host->create("a", "10.0.0.1"); -ok($ha, "made hostA"); -my $hb = MogileFS::Host->create("b", "10.0.0.2"); -ok($hb, "made hostB"); -ok(!eval{ MogileFS::Host->create("b", "10.0.0.3") }, "can't dup hostB's name"); -is(error_code($@), "dup", "yup, was a dup"); -ok(!eval{ MogileFS::Host->create("c", "10.0.0.2") }, "can't dup hostB's IP"); -is(error_code($@), "dup", "yup, was a dup"); - -ok($hb->set_ip("10.0.0.4"), "set IP"); -is($hb->ip, "10.0.0.4", "IP matches"); -ok(!eval{$hb->set_ip("10.0.0.1")}, "IP's taken"); -is(error_code($@), "dup", "yup, was a dup"); - -is(scalar MogileFS::Host->hosts, 2, "2 hosts now"); -ok($ha->delete, "deleted hostA"); -is(scalar MogileFS::Host->hosts, 1, "1 host now"); - -my $da = MogileFS::Device->create(devid => 1, - hostid => $hb->id, - status => "alive"); -ok($da, "made dev1"); -ok($da->not_on_hosts($ha), "dev1 not on ha"); -ok(!$da->not_on_hosts($hb), "dev1 is on hb"); - - - - diff --git a/t/http.t b/t/http.t new file mode 100644 index 00000000..92a30183 --- /dev/null +++ b/t/http.t @@ -0,0 +1,359 @@ +# this test reaches inside MogileFS::Host and MogileFS::Connection::HTTP +# internals to ensure error handling and odd corner cases are handled +# (existing tests may not exercise this in monitor) +use strict; +use warnings; +use Test::More; +use MogileFS::Server; +use MogileFS::Test; +use MogileFS::Util qw/wait_for_readability/; +use Danga::Socket; +use IO::Socket::INET; +use Socket qw(TCP_NODELAY); + +# bind a random TCP port for testing +my %lopts = ( + LocalAddr => "127.0.0.1", + LocalPort => 0, + Proto => "tcp", + ReuseAddr => 1, + Listen => 1024 +); +my $http = IO::Socket::INET->new(%lopts); +$http->sockopt(TCP_NODELAY, 1); +my $http_get = IO::Socket::INET->new(%lopts); +$http_get->sockopt(TCP_NODELAY, 1); + +my $host_args = { + hostid => 1, + hostname => 'mockhost', + hostip => $http->sockhost, + http_port => $http->sockport, + http_get_port => $http_get->sockport, +}; +my $host = MogileFS::Host->new_from_args($host_args); + +# required, defaults to 20 in normal server +MogileFS::Config->set_config("conn_pool_size", 13); + +MogileFS::Host->_init_pools; + +my $idle_pool = $MogileFS::Host::http_pool->{idle}; +is("MogileFS::Host", ref($host), "host created"); +MogileFS::Config->set_config("node_timeout", 1); + +is(13, $MogileFS::Host::http_pool->{total_capacity}, "conn_pool_size took effect"); + +# hit the http_get_port +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http_get("GET", "/read-only", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http_get->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /read-only HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is success"); + is(200, $resp->code, "got HTTP 200 response"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_get_port}"}; + is(1, scalar @$pool, "connection placed in GET pool"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a trickled response from server +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/trickle", undef, sub { $resp = $_[0] }); + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + my $r = "HTTP/1.1 200 OK\r\nContent-Length: 100\r\n\r\n"; + if ($buf =~ /trickle/) { + foreach my $x (split(//, $r)) { + $s->syswrite($x); + sleep 0.01; + } + foreach my $i (1..100) { + $s->syswrite($i % 10); + sleep 0.1; + } + } + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is successful"); + my $expect = ""; + foreach my $i (1..100) { + $expect .= $i % 10; + } + is($expect, $resp->content, "response matches expected"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a differently trickled response from server +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + my $body = "*" x 100; + $host->http("GET", "/trickle-head-body", undef, sub { $resp = $_[0] }); + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + my $r = "HTTP/1.1 200 OK\r\nContent-Length: 100\r\n\r\n"; + if ($buf =~ /trickle-head-body/) { + $s->syswrite($r); + sleep 1; + $s->syswrite($body); + } + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is successful on trickle"); + is($resp->content, $body, "trickled response matches expected"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a server that disconnected after a (very short) idle time +# despite supporting persistent conns +{ + my $resp; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/foo", undef, sub { $resp = $_[0] }); + my $conn; + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /foo HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + ok($resp->is_success, "HTTP response is success"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "connection placed in pool"); + $conn = $pool->[0]; + }); + + # try again, server didn't actually keep the connection alive, + $resp = undef; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/again", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ m{\AGET /again HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n"); + } + sleep 6; # wait for SIGKILL + }, + sub { + Danga::Socket->EventLoop; + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "new connection placed in pool"); + isnt($conn, $pool->[0], "reference not reused"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate persistent connection reuse +{ + my $resp; + my $nr = 6; + my $conn; + + my $failsafe = Danga::Socket->AddTimer(5, sub { $resp = "FAIL TIMEOUT" }); + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + + server_do(sub { + my $s = $http->accept; + my $buf; + foreach my $i (1..$nr) { + $buf = read_one_request($s); + if ($buf =~ m{\AGET /$i HTTP/1\.0\r\n}) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n$i"); + } + } + sleep 6; # wait for SIGKILL + }, + sub { + foreach my $i (1..$nr) { + $resp = undef; + $host->http("GET", "/$i", undef, sub { $resp = $_[0] }); + Danga::Socket->EventLoop; + is(ref($resp), "HTTP::Response", "got HTTP response"); + ok($resp->is_success, "HTTP response is successful"); + is($i, $resp->content, "response matched"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(1, scalar @$pool, "connection placed in connection pool"); + + if ($i == 1) { + $conn = $pool->[0]; + is("MogileFS::Connection::HTTP", ref($conn), "got connection"); + } else { + ok($conn == $pool->[0], "existing connection reused (#$i)"); + } + } + }); + $failsafe->cancel; + + has_nothing_inflight(); + has_nothing_queued(); +} + +# simulate a node_timeout +sub sim_node_timeout { + my ($send_header) = @_; + my $resp; + + # we need this timer (just to exist) to break out of the event loop + my $t = Danga::Socket->AddTimer(1.2, sub { fail("timer should not fire") }); + + my $req = "/node-time-me-out-"; + $req .= $send_header ? 1 : 0; + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", $req, undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + if ($buf =~ /node-time-me-out/) { + if ($send_header) { + $s->syswrite("HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n"); + } + sleep 60; # wait to trigger timeout + } else { + # nuke the connection to _NOT_ trigger timeout + $s->syswrite("HTTP/1.1 404 Not Found\r\n\r\n"); + close($s); + } + }, + sub { + Danga::Socket->EventLoop; + $t->cancel; + ok(! $resp->is_success, "HTTP response is not successful"); + like($resp->message, qr/node_timeout/, "node_timeout hit"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +sim_node_timeout(0); +sim_node_timeout(1); + +# server just drops connection +{ + my $resp; + + # we want an empty pool to avoid retries + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $host->http("GET", "/drop-me", undef, sub { $resp = $_[0] }); + + server_do(sub { + my $s = $http->accept; + my $buf = read_one_request($s); + close $s if ($buf =~ /drop-me/); + sleep 6; + }, + sub { + Danga::Socket->EventLoop; + ok(! $resp->is_success, "HTTP response is not successful"); + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + }); + + has_nothing_inflight(); + has_nothing_queued(); +} + +# server is not running +{ + my $resp; + + # we want an empty pool to avoid retries + my $pool = $idle_pool->{"$host->{hostip}:$host->{http_port}"}; + is(0, scalar @$pool, "connection pool is empty"); + + Danga::Socket->SetPostLoopCallback(sub { ! defined($resp) }); + $http->close; # $http is unusable after this + $host->http("GET", "/fail", undef, sub { $resp = $_[0] }); + Danga::Socket->EventLoop; + ok(! $resp->is_success, "HTTP response is not successful"); + ok($resp->header("X-MFS-Error"), "X-MFS-Error is set"); + is(0, scalar @$pool, "connection pool is empty"); + + has_nothing_inflight(); + has_nothing_queued(); +} + +done_testing(); + +sub has_nothing_inflight { + my $inflight = $MogileFS::Host::http_pool->{inflight}; + my $n = 0; + foreach my $host_port (keys %$inflight) { + $n += scalar keys %{$inflight->{$host_port}}; + } + is($MogileFS::Host::http_pool->{total_inflight}, 0, "nothing is counted to be inflight"); + is($n, 0, "nothing is really inflight"); +} + +sub has_nothing_queued { + is(scalar @{$MogileFS::Host::http_pool->{queue}}, 0, "connection pool task queue is empty"); +} + +sub server_do { + my ($child, $parent) = @_; + my $pid = fork; + fail("fork failed: $!") unless defined($pid); + + if ($pid == 0) { + $child->(); + } else { + $parent->(); + is(1, kill(9, $pid), "child killed"); + is($pid, waitpid($pid, 0), "child reaped"); + } +} + +sub read_one_request { + my $s = shift; + + my $fd = fileno($s); + wait_for_readability($fd, 5); + my $buf = ""; + do { + $s->sysread($buf, 4096, length($buf)); + } while wait_for_readability($fd, 0.1) && $buf !~ /\r\n\r\n/; + return $buf; +} diff --git a/t/mogstored-shutdown.t b/t/mogstored-shutdown.t index 6556ba5f..60892ea6 100644 --- a/t/mogstored-shutdown.t +++ b/t/mogstored-shutdown.t @@ -13,8 +13,6 @@ unless ((`netstat -nap --inet` || "") =~ m!PID/Program!) { exit 0; } -plan tests => 4; - my $TEST_IP = '127.0.1.1'; my $rv; @@ -88,3 +86,5 @@ sub try { } return undef; } + +done_testing(); diff --git a/t/multiple-devices-replpol.t b/t/multiple-devices-replpol.t new file mode 100644 index 00000000..42ac924a --- /dev/null +++ b/t/multiple-devices-replpol.t @@ -0,0 +1,135 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::ReplicationPolicy::MultipleDevices; +use MogileFS::Test; + +# already good. +is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), + "all_good", "all good"); + +# need to get it onto host2... +is(rr("min=2 h1[d1=X d2=_] h2[d3=_ d4=_]"), + "ideal(2,3,4)", "any device will do"); + +# still needs to be on host2, even though 2 copies on host1 +is(rr("min=2 h1[d1=X d2=X] h2[d3=_ d4=_]"), + "all_good", "2 devices is enough"); + +# anywhere will do. (can happen on, say, rebalance) +is(rr("min=2 h1[d1=_ d2=_] h2[d3=_ d4=_]"), + "ideal(1,2,3,4)", "anywhere"); + +# should desperately try d2, since host2 is down +is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_]"), + "ideal(2)"); + +# should try host3, since host2 is down +is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_] h3[d5=_ d6=_]"), + "ideal(2,5,6)"); + +# need a copy on a non-dead disk on host1 +is(rr("min=2 h1[d1=_ d2=X,dead] h2=alive[d3=X d4=_]"), + "ideal(1,4)"); + +# this is an ideal move, since we only have 2 unique hosts: +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_]"), + "ideal(1,4)"); + +# ... but if we have a 3rd host, well, it could be there +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3[d5=_]"), + "ideal(1,4,5)"); + +# ... unless that host is down, in which case it's back to 1/4, +# but desperately +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3=down[d5=_]"), + "ideal(1,4)"); + +# too good, uniq hosts > min +is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_] h3[d5=X]"), + "too_good"); + +# too good, but but with uniq hosts == min +is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), + "too_good"); + +# be happy with 3 copies, even though two are on same host (that's our max unique hosts) +is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), + "all_good"); + +# be happy with one drain copy +is(rr("min=2 h1[d3=X,drain d5=_] h2[d4=X d6=_]"), + "all_good", + "we are happy with one copy in a drain device"); + +# drain copy counts +is(rr("min=2 h1[d3=X,drain d5=X] h2[d4=X d6=_]"), + "too_good", + "the extra copy in drain leaves us too satisfied"); + +sub rr { + my ($state) = @_; + my $ostate = $state; # original + + MogileFS::Factory::Host->t_wipe; + MogileFS::Factory::Device->t_wipe; + MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + my $hfac = MogileFS::Factory::Host->get_factory; + my $dfac = MogileFS::Factory::Device->get_factory; + + my $min = 2; + if ($state =~ s/^\bmin=(\d+)\b//) { + $min = $1; + } + + my $hosts = {}; + my $devs = {}; + my $on_devs = []; + + my $parse_error = sub { + die "Can't parse:\n $ostate\n" + }; + while ($state =~ s/\bh(\d+)(?:=(.+?))?\[(.+?)\]//) { + my ($n, $opts, $devstr) = ($1, $2, $3); + $opts ||= ""; + die "dup host $n" if $hosts->{$n}; + + my $h = $hosts->{$n} = $hfac->set({ hostid => $n, + status => ($opts || "alive"), observed_state => "reachable", + hostname => $n }); + + foreach my $ddecl (split(/\s+/, $devstr)) { + $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ + or $parse_error->(); + my ($dn, $on_not, $status) = ($1, $2, $3); + die "dup device $dn" if $devs->{$dn}; + my $d = $devs->{$dn} = $dfac->set({ devid => $dn, + hostid => $h->id, observed_state => "writeable", + status => ($status || "alive"), mb_total => 1000, + mb_used => 100, }); + if ($on_not eq "X" && $d->dstate->should_have_files) { + push @$on_devs, $d; + } + } + } + $parse_error->() if $state =~ /\S/; + + my $polclass = "MogileFS::ReplicationPolicy::MultipleDevices"; + my $pol = $polclass->new; + my $rr = $pol->replicate_to( + fid => 1, + on_devs => $on_devs, + all_devs => $devs, + failed => {}, + min => $min, + ); + return $rr->t_as_string; +} + +done_testing(); diff --git a/t/multiple-hosts-replpol.t b/t/multiple-hosts-replpol.t index 4aa870e6..1d5545ca 100644 --- a/t/multiple-hosts-replpol.t +++ b/t/multiple-hosts-replpol.t @@ -10,8 +10,6 @@ use MogileFS::Util qw(error_code); use MogileFS::ReplicationPolicy::MultipleHosts; use MogileFS::Test; -plan tests => 13; - # already good. is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), "all_good", "all good"); @@ -65,13 +63,25 @@ is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), "all_good"); +# be happy with one drain copy +is(rr("min=2 h1[d3=X,drain d5=_] h2[d4=X d6=_]"), + "all_good", + "we are happy with one copy in a drain device"); + +# drain copy counts +is(rr("min=2 h1[d3=X,drain d5=X] h2[d4=X d6=_]"), + "too_good", + "the extra copy in drain leaves us too satisfied"); + sub rr { my ($state) = @_; my $ostate = $state; # original - MogileFS::Host->t_wipe_singletons; - MogileFS::Device->t_wipe_singletons; + MogileFS::Factory::Host->t_wipe; + MogileFS::Factory::Device->t_wipe; MogileFS::Config->set_config_no_broadcast("min_free_space", 100); + my $hfac = MogileFS::Factory::Host->get_factory; + my $dfac = MogileFS::Factory::Device->get_factory; my $min = 2; if ($state =~ s/^\bmin=(\d+)\b//) { @@ -90,17 +100,19 @@ sub rr { $opts ||= ""; die "dup host $n" if $hosts->{$n}; - my $h = $hosts->{$n} = MogileFS::Host->of_hostid($n); - $h->t_init($opts || "alive"); + my $h = $hosts->{$n} = $hfac->set({ hostid => $n, + status => ($opts || "alive"), observed_state => "reachable", + hostname => $n }); foreach my $ddecl (split(/\s+/, $devstr)) { $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ or $parse_error->(); my ($dn, $on_not, $status) = ($1, $2, $3); die "dup device $dn" if $devs->{$dn}; - my $d = $devs->{$dn} = MogileFS::Device->of_devid($dn); - $status ||= "alive"; - $d->t_init($h->id, $status); + my $d = $devs->{$dn} = $dfac->set({ devid => $dn, + hostid => $h->id, observed_state => "writeable", + status => ($status || "alive"), mb_total => 1000, + mb_used => 100, }); if ($on_not eq "X" && $d->dstate->should_have_files) { push @$on_devs, $d; } @@ -120,3 +132,4 @@ sub rr { return $rr->t_as_string; } +done_testing(); diff --git a/t/replpolicy-parsing.t b/t/replpolicy-parsing.t index 782d918a..6298fdb4 100644 --- a/t/replpolicy-parsing.t +++ b/t/replpolicy-parsing.t @@ -10,8 +10,6 @@ use MogileFS::Server; use MogileFS::Util qw(error_code); use MogileFS::Test; -plan tests => 31; - my $obj; $obj = MogileFS::ReplicationPolicy->new_from_policy_string("MultipleHosts(5)"); @@ -39,4 +37,4 @@ foreach my $str ("Union(MultipleHosts(5), MultipleHosts(2))", isa_ok($obj->{policies}[1], "MogileFS::ReplicationPolicy::MultipleHosts"); } - +done_testing(); diff --git a/t/store-sqlite.t b/t/store-sqlite.t new file mode 100644 index 00000000..8468d6ac --- /dev/null +++ b/t/store-sqlite.t @@ -0,0 +1,110 @@ +# -*-perl-*- +# tests for SQlite-specific features +use strict; +use warnings; +use Test::More; +use FindBin qw($Bin); + +use MogileFS::Server; +use MogileFS::Util qw(error_code); +use MogileFS::Test; +use File::Temp (); +use POSIX qw(:sys_wait_h); + +my ($fh, $filename) = File::Temp::tempfile(); +close($fh); +MogileFS::Config->set_config('db_dsn', "DBI:SQLite:$filename"); +MogileFS::Config->set_config('db_user', ''); +MogileFS::Config->set_config('db_pass', ''); +MogileFS::Config->set_config('max_handles', 0xffffffff); + +my ($r, $w, $pid, $buf); +my $sto = eval { MogileFS::Store->new }; +if ($sto) { + plan tests => 28; +} else { + plan skip_all => "Can't create temporary test database: $@"; + exit 0; +} + +Mgd::set_store($sto); +is(ref($sto), "MogileFS::Store::SQLite", "store is sane"); +is($sto->setup_database, 1, "setup database"); + +is(1, pipe($r, $w), "IPC pipe is ready"); + +# normal lock contention +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $SIG{TERM} = sub { + $sto->release_lock("test-lock") == 1 or die "released bad lock"; + exit 0; + }; + $sto->get_lock("test-lock", 1) == 1 or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 60; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "child wakes us up"); + is($buf, ".", "child wakes parent up properly"); + ok(! $sto->get_lock("test-lock", 1), "fails to lock while child has lock"); + is(kill(TERM => $pid), 1, "kill successful"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + is($?, 0, "child dies correctly"); + is($sto->get_lock("test-lock", 1), 1, "acquire lock when child dies"); +} + +# detects recursive lock +ok(! eval { $sto->get_lock("test-lock", 1); }, "recursion fails"); +like($@, qr/Lock recursion detected/i, "proper error on failed lock"); +is($sto->release_lock("test-lock"), 1, "lock release"); + +is($sto->get_lock("test-lock", 0), 1, "acquire lock with 0 timeout"); +is($sto->release_lock("test-lock"), 1, "lock release"); +is($sto->release_lock("test-lock") + 0, 0, "redundant lock release"); + +# waits for lock +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $sto->get_lock("test-lock", 1) or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 2; + $sto->release_lock("test-lock") == 1 or die "child failed to release"; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "parent woken up"); + is($buf, ".", "child wakes parent up properly"); + ok($sto->get_lock("test-lock", 6), "acquire lock eventually"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + is($?, 0, "child dies correctly"); + is($sto->release_lock("test-lock"), 1, "lock release"); +} + + +# kill -9 a lock holder +$pid = fork; +fail("fork failed: $!") unless defined $pid; +if ($pid == 0) { + $sto = Mgd::get_store(); # fork-safe + $sto->get_lock("test-lock", 1) or die "child failed to get_lock"; + close($r); + syswrite($w, ".") == 1 or die "child failed to wake parent"; + sleep 60; + exit 0; +} +if ($pid > 0) { + is(sysread($r, $buf, 1), 1, "parent woken up"); + is($buf, ".", "child wakes parent up properly"); + is(kill(KILL => $pid), 1, "kill -9 successful"); + is(waitpid($pid, 0), $pid, "waitpid successful"); + ok(WIFSIGNALED($?) && WTERMSIG($?) == 9, "child was SIGKILL-ed"); + ok($sto->get_lock("test-lock", 1), "acquire lock in parent"); +} diff --git a/t/store.t b/t/store.t index c4affe6a..cf9ae358 100644 --- a/t/store.t +++ b/t/store.t @@ -10,17 +10,15 @@ use MogileFS::Util qw(error_code); use MogileFS::Test; my $sto = eval { temp_store(); }; -if ($sto) { - plan tests => 12; -} else { +if (!$sto) { plan skip_all => "Can't create temporary test database: $@"; exit 0; } -my $dom = MogileFS::Domain->create("foo"); -ok($dom, "created a domain"); -my $cls = $dom->create_class("classA"); -ok($cls, "created a class"); +my $dmid = $sto->create_domain("foo"); +ok($dmid, "created a domain"); +my $clsid = $sto->create_class($dmid, "classA"); +ok($clsid, "created a class"); my $df = MogileFS::DevFID->new(100, 200); ok($df, "made devfid"); @@ -42,9 +40,9 @@ is(scalar @on, 2, "FID 101 on 2 devices"); { my $fidid = $sto->register_tempfile( fid => undef, - dmid => $dom->id, + dmid => $dmid, key => "my_tempfile", - classid => $cls->classid, + classid => $clsid, devids => join(',', 1,2,3), ); ok($fidid, "got a fidid"); @@ -52,9 +50,9 @@ is(scalar @on, 2, "FID 101 on 2 devices"); my $fidid2 = eval { $sto->register_tempfile( fid => $fidid, - dmid => $dom->id, + dmid => $dmid, key => "my_tempfile", - classid => $cls->classid, + classid => $clsid, devids => join(',', 1,2,3), ); }; @@ -63,3 +61,178 @@ is(scalar @on, 2, "FID 101 on 2 devices"); is($errc, "dup", "got a dup into tempfile") or die "Got error: $@\n"; } + +my $ignore_replace_match = { + base => { pattern => undef, dies => 1 }, + MySQL => { pattern => qr/INSERT IGNORE/, dies => 0 }, + SQLite => { pattern => qr/REPLACE/, dies => 0 }, + Postgres => { pattern => undef, dies => 1 }, +}; + +my $prx = eval { $sto->ignore_replace } || ''; +my $sto_driver = ( split( /::/, ref($sto) ) )[2] || 'base'; +my $match_spec = $ignore_replace_match->{ $sto_driver } + or die "Test not configured for '$sto_driver' storage driver"; + + +ok( + ref( $match_spec->{pattern} ) eq 'Regexp'? + ( $prx =~ $match_spec->{pattern} ) : + ( !$prx ), + sprintf( + "ignore_replace %s return value for storage type '%s'", + ref( $match_spec->{pattern} ) eq 'Regexp'? + 'should' : 'should not', + $sto_driver + ) +) or diag "Got value: $prx"; + +ok( + $match_spec->{dies}? $@ : !$@, + sprintf( + "ignore_replace %s die for storage type '%s'", + $match_spec->{dies}? 'should' : 'should not', + $sto_driver + ) +) or diag "Got exception: $@"; + +my $rv; + +# test retry_on_deadlock using good sql +$rv = eval { + $sto->retry_on_deadlock( sub { $sto->dbh->do("SELECT 1;"); } ); +}; +ok ( + $rv eq '1' || $rv eq '0E0', + "retry_on_deadlock return value for '$sto_driver': $rv" +) or diag "Got return value: $rv"; + +# test retry_on_deadlock using bad sql +$rv = eval { + $sto->retry_on_deadlock( sub { $sto->dbh->do("BADSQL;"); } ); +}; +ok ( + $@ =~ /BADSQL/, + "retry_on_deadlock got an exception on bad sql '$sto_driver'" +) or diag "Got exception value: $@"; + +# test retry_on_deadlock using a custom exception +$rv = eval { + $sto->retry_on_deadlock( sub { die "preempt"; } ); +}; +ok ( + $@ =~ /preempt/, + "retry_on_deadlock got a non-sql exception for '$sto_driver'" +) or diag $@; + +sub _do_induce_deadlock { + my @args = @_; + return eval { + no strict 'refs'; + no warnings 'redefine'; + my $c = 0; + local *{ "MogileFS\::Store\::$sto_driver\::was_deadlock_error" } = sub { + return $c++ < 2; # unlock on third try + }; + $sto->retry_on_deadlock( @args ); + }; +} + +# attempt to induce a deadlock and check iterations +my $_v = 0; +$rv = _do_induce_deadlock( sub { return $_v++; } ); + +ok( + !$@, + "no exception on retry_on_deadlock while inducing a deadlock" +) or diag $@; + +ok( + $rv == 2, + 'retry_on_deadlock returned good iteration count while inducing a deadlock' +) or diag $rv; + +# induce a deadlock using badsql... should return an exemption +$rv = _do_induce_deadlock( sub { $sto->dbh->do("BADSQL;"); } ); +ok ( + !$rv && $@ =~ /BADSQL/, + "retry_on_deadlock got expected exemption inducing a deadlock with bad sql" +) or diag "Got value '$rv' with exemption: $@"; + +# induce a deadlock with good sql check sql return and iterations +$_v = 0; +$rv = _do_induce_deadlock( + sub { + return [ $sto->dbh->do("SELECT 1;"), $_v++ ]; + } +); +ok ( + ( !$@ && ref($rv) eq 'ARRAY' ) && ( + ( $rv->[0] eq '1' || $rv->[0] eq '0E0' ) && + $rv->[1] == 2 + ), + "retry_on_deadlock got proper return value and iteration while inducing a deadlock" +); + +use Digest::MD5 qw(md5); + +$sto->set_checksum(6, 1, md5("FOO")); +my $hash = $sto->get_checksum(6); +ok($hash->{checksum} eq md5("FOO"), "checksum matches expected"); +ok($hash->{fid} == 6, "checksum fid set correctly"); +ok($hash->{hashtype} == 1, "hashtype set correctly"); + +$sto->set_checksum(6, 2, md5("MOO")); +$hash = $sto->get_checksum(6); +ok($hash->{checksum} eq md5("MOO"), "checksum matches expected"); +ok($hash->{fid} == 6, "checksum fid set correctly"); +ok($hash->{hashtype} == 2, "hashtype set correctly"); + +ok(1 == $sto->delete_checksum(6), "checksum deleted OK"); +ok(0 == $sto->delete_checksum(6), "checksum delete MISS"); +ok(!defined $sto->get_checksum(6), "undef on missing checksum"); + +# case-sensitivity tests for list_keys +my %arg = ( + fidid => 1234, + dmid => $dmid, + key => 'Case_Sensitive_Clod', + length => 1, + classid => $clsid, + devcount => 1 +); +$sto->replace_into_file(%arg); +my $rows; + +# ensure existing (broken) case-insensitive list_keys works for MySQL/SQLite +# LIKE is always case-sensitive in Postgres, so its behavior for list_keys +# was never broken. +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +if (ref($sto) eq "MogileFS::Store::Postgres") { + ok(scalar @$rows == 0, "Postgres list_keys is case-sensitive"); +} else { + ok($rows->[0] eq 'Case_Sensitive_Clod', "list_keys matches insensitively"); +} + +# make list_keys case-sensitive +MogileFS::Config->set_server_setting("case_sensitive_list_keys", 1); +MogileFS::Config->cache_server_setting("case_sensitive_list_keys", 1); + +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +ok(scalar @$rows == 0, "case-incorrect list_keys fails to match"); +$rows = $sto->get_keys_like($dmid, "Case", undef, 1000); +ok($rows->[0] eq 'Case_Sensitive_Clod', "case-correct list_keys matches"); +ok(scalar @$rows == 1, "only one row matched"); + +# make list_keys case-insensitive again +MogileFS::Config->set_server_setting("case_sensitive_list_keys", 0); +MogileFS::Config->cache_server_setting("case_sensitive_list_keys", 0); + +$rows = $sto->get_keys_like($dmid, "case", undef, 1000); +if (ref($sto) eq "MogileFS::Store::Postgres") { + ok(scalar @$rows == 0, "Postgres list_keys is case-sensitive"); +} else { + ok($rows->[0] eq 'Case_Sensitive_Clod', "list_keys matches insensitively (again)"); +} + +done_testing(); diff --git a/t/util.t b/t/util.t index 76cf0df5..2d80a045 100644 --- a/t/util.t +++ b/t/util.t @@ -5,8 +5,6 @@ use warnings; use Test::More; use MogileFS::Util qw(weighted_list); -plan tests => 1; - my %first; for (1..100) { my @l = weighted_list(["A", 0.1], ["B", 0.3]); @@ -16,4 +14,4 @@ for (1..100) { # conservative when playing with randomness ok($first{"B"} >= ($first{"A"} * 1.8), "weightest list"); - +done_testing();