From 338e54419d709c449469ee62620dbf17ed2e0f71 Mon Sep 17 00:00:00 2001 From: Kennedy Mwenja Date: Thu, 25 Oct 2018 06:13:32 +0300 Subject: [PATCH] Glock v0.1.0 First working version. --- .gitignore | 2 + LICENSE | 21 ++++++ Makefile | 16 +++++ README.md | 76 +++++++++++++++++++- go.mod | 3 + go.sum | 2 + main.go | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 325 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a5d8f72 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin/ +dist/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9bd6cab --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Kennedy Mwenja + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5262747 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +VERSION := $(shell sh -c 'git describe --always --tags') +LDFLAGS := -ldflags "-X main.VERSION=$(VERSION)" + +all: build + +build: + mkdir -p bin + go build -o bin/glock $(LDFLAGS) . + +dist: build + rm -rf dist/* + mkdir -p dist/glock + cp bin/glock dist/glock/ + tar -C dist -czvf dist/glock-$(VERSION).tar.gz glock + +.PHONY= all build diff --git a/README.md b/README.md index e643a78..42ef4b3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,77 @@ -GLock -===== +# GLock + +*Project Status: ALPHA* Runs a command if an associated lockfile is not acquired by another command. Spiritual successor to `flock`. + +Flock is a unix utility that allows you to specify a lockfile before running a command +so that only one instance of that command runs at a time. + +A typical invocation of flock: +```sh +# this acquires the lockfile and runs the script +flock -xn /tmp/lockfile long_running_script.sh + +# this fails immediately because another script has acquired the lockfile +flock -xn /tmp/lockfile long_running_script.sh +``` + +This makes it very convenient for controlling cron scripts that may run longer than their schedule. +For instance, a cron script may be scheduled to run every 30 mins but it's run time may end up +being 40 mins, longer than that 30 mins. This may be undesirable for scripts that require exclusive +access to some resource or scripts that when ran in parallel overutilize resources. + +That being said, it is considered that engineering exclusive locks in the script itself +would be a better and more maintenable solution. However, there can be situations +that justify the use of `flock` and `glock` hopes to extend and improve the solutions. +Specifically, flock does not support the following uses cases: + +1. Specifying a timeout for a script. A script may fail in such a way that it does not exit e.g. deadlocks. + Flock doesn't allow you to specify that if the script doesn't exit in a specified amount of time, it is killed instead. + You could potentially do the same with the `timeout` utility i.e. `timeout 5 flock ....` but this + doesn't take the lockfile into consideration. For example in this case, once the script is killed, + the lockfile needs to be released (deleted). Glock attempts to support this usecase. + +2. Determining if a script owning a lockfile is dead. It is possible for flock to exit without + releasing the lockfile. This could possibly be due to a *hard* exit e.g. signal-kill or OOM. + In this scenario, because the lockfile was not removed, the next script will fail to start. + Glock attempts to solve this by writing the pid of the process owning the lockfile *into* the + lockfile. This allows the next invocation to query whether that pid is alive and if it's not, + remove the *stale* lockfile and attempt to re-acquire a new lockfile. + +Glock, however, does not currently support: + +1. Shared locks also known as multiple readers, single writer locks. +2. Introspection tools to query the state of a running instance of glock (lockfile, its process). + +## Installing + +**Prebuilt binaries**: + +1. Download a tarball from [Releases](https://github.com/kmwenja/glock/releases). +2. Extract the tarball: `tar -xvf glock-vX.Y.Z.tar.gz`. This will extract a directory called `glock`. +3. Copy the binary at `glock/glock` to a suitable path or run `glock/glock` directly. + +**From Source**: +`go get -u -v github.com/kmwenja/glock` + +## Usage + +```sh +# help +glock + +# run with defaults +glock echo hello world + +# change the lockfile +glock -lockfile /tmp/mylockfile + +# run with a specific timeout (10mins) +glock -timeout 600 echo hello world + +# if another process has the lockfile, +# wait for them to be done for some time (20s) before quitting +glock -wait 20 echo hello world +``` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..5373ead --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/kmwenja/glock + +require github.com/pkg/errors v0.8.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3dfe462 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/main.go b/main.go new file mode 100644 index 0000000..6d9c278 --- /dev/null +++ b/main.go @@ -0,0 +1,207 @@ +package main + +import ( + "flag" + "fmt" + "os" + "os/exec" + "strings" + "syscall" + "time" + + "github.com/pkg/errors" +) + +var VERSION = "" + +func main() { + var ( + timeout = flag.Int("timeout", 60, "number of seconds to wait for the command to terminate, otherwise force terminate. Use -1 to indicate 'wait forever'") + lockfile = flag.String("lockfile", "/tmp/glockfile", "file to acquire to ensure the command can be run. If file exists, quit.") + wait = flag.Int("wait", 10, "number of seconds to wait to acquire the lockfile, otherwise quit. Use -1 to indicate 'wait and retry every 10s forever'") + version = flag.Bool("version", false, "print version") + ) + flag.Parse() + + if *version { + fmt.Printf("Version: %s\n", VERSION) + os.Exit(0) + } + + args := flag.Args() + if len(args) < 1 { + fmt.Printf("Usage: glock [options] command arg1 arg2 arg3 ....\n\n") + fmt.Printf("Options:\n") + flag.PrintDefaults() + os.Exit(1) + } + + if !glock(*lockfile, *wait, *timeout, args) { + os.Exit(1) + } +} + +func glock(lockfile string, wait int, timeout int, command []string) bool { + // try acquiring the lock file + log("obtaining lockfile: %s", lockfile) + start := time.Now() + for { + err := lockFile(lockfile) + if err == nil { + defer func() { + err = os.Remove(lockfile) + if err != nil { + logErr(errors.Wrap(err, "could not remove lockfile")) + } + log("released lockfile: %s", lockfile) + }() + break + } + + logErr(errors.Wrap(err, "lock file error:")) + + // if we can't obtain lockfile, wait as instructed + if wait > -1 { + if time.Since(start) >= time.Duration(wait)*time.Second { + // we waited long enough, quitting + logErr(fmt.Errorf("could not obtain lockfile after waiting %ds", wait)) + return false + } + } + + logErr(fmt.Errorf("waiting 1s to try again")) + time.Sleep(1 * time.Second) + } + log("obtained lockfile: %s", lockfile) + + // run command, and start timing + // if command does not exit before timer, quit + cmdString := strings.Join(command[0:], " ") + log("running command (timeout: %ds): %s", timeout, cmdString) + + cmd := command[0] + args := command[1:] + c := exec.Command(cmd, args...) + c.Stdin = os.Stdin + c.Stdout = os.Stdout + c.Stderr = os.Stderr + + if err := c.Start(); err != nil { + logErr(errors.Wrap(err, "could not start command")) + return false + } + + if timeout == -1 { + // wait forever + if err := c.Wait(); err != nil { + logErr(errors.Wrap(err, "command exited with an error")) + return false + } + log("successfully ran command") + return true + } + + timeoutDur := time.Duration(timeout) * time.Second + done := make(chan error, 1) + go func() { + done <- c.Wait() + }() + select { + case <-time.After(timeoutDur): + if err := c.Process.Kill(); err != nil { + logErr(errors.Wrap(err, "could not kill command")) + return false + } + logErr(fmt.Errorf("command took longer than timeout and was killed")) + return false + case err := <-done: + if err != nil { + logErr(errors.Wrap(err, "command exited with an error")) + return false + } + log("successfully ran command") + return true + } +} + +func lockFile(filename string) error { + // check if file exists first + err := checkExisting(filename) + if err != nil { + return errors.Wrap(err, "check existing error") + } + + f, err := os.OpenFile( + filename, + os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600) + if err != nil { + return errors.Wrap(err, "could not create lockfile") + } + defer f.Close() + + // write pid into lockfile so that the owner can be traced + _, err = fmt.Fprintf(f, "%d\n", os.Getpid()) + if err != nil { + return errors.Wrap(err, "could not write pid to lockfile") + } + + return nil +} + +func checkExisting(filename string) error { + f, err := os.OpenFile( + filename, os.O_RDONLY, 0600) + if err != nil { + pe, ok := err.(*os.PathError) + if !ok { + return errors.Wrap(err, "unknown file error") + } + + if pe.Err.Error() != "no such file or directory" { + return errors.Wrap(err, "could not open file") + } + + // lockfile does not exist, this is fine + return nil + } + defer f.Close() + + // lockfile exists, get pid of owner + var pid int + _, err = fmt.Fscanf(f, "%d\n", &pid) + if err != nil { + // TODO potentially remove invalid lockfiles + return errors.Wrap(err, "could not read from existing lockfile") + } + + // check if owner is still alive + process, err := os.FindProcess(pid) + if err != nil { + return errors.Wrapf(err, "failed while finding process %d", pid) + } + err = process.Signal(syscall.Signal(0)) + if err == nil { + return fmt.Errorf("lockfile in use by another process") + } + if err.Error() != "os: process already finished" { + return errors.Wrapf(err, "failed while finding process: %d", pid) + } + + // owner of pid already finished so remove lockfile + // TODO do this after closing the file + err = os.Remove(filename) + if err != nil { + return errors.Wrapf(err, "could not remove existing lockfile") + } + + return nil +} + +func log(s string, args ...interface{}) { + newS := fmt.Sprintf(s, args...) + fmt.Fprintf(os.Stdout, "glock: %s\n", newS) +} + +func logErr(e error) { + fmt.Fprintf(os.Stderr, "glock: %v\n", e) +}