Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RHELC-1124] Refactor applock to have a more robust API. #979

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 80 additions & 41 deletions convert2rhel/applock.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
__metaclass__ = type

import errno
import fcntl
import logging
import os
import stat
import tempfile


Expand Down Expand Up @@ -51,6 +53,11 @@ class ApplicationLock:
process is not running, it removes the file and tries to lock it
again.

File locking: to avoid a race condition in which another process
overwrites the PID file between the time we check for the process's
existence and we unlink the stale lockfile, we lock the file using
BSD file locking.

For safety, unexpected conditions, like garbage in the file or
bad permissions, are treated as if the application is locked,
because something is obviously wrong.
Expand All @@ -59,24 +66,24 @@ class ApplicationLock:
def __init__(self, name):
# Our application name
self._name = name
# Do we think we locked the pid file?
self._locked = False
# Our process ID
# Our process ID. We save this when the lock is created so it will be
# consistent even if we check from inside a fork.
self._pid = os.getpid()
# Path to the file that contains the process id
self._pidfile = os.path.join(_DEFAULT_LOCK_DIR, self._name + ".pid")

def __str__(self):
if self._locked:
if self.is_locked:
status = "locked"
else:
status = "unlocked"
return "%s PID %d %s" % (self._pidfile, self._pid, status)

def _try_create(self):
"""Try to create the lock file. If this succeeds, the lock file
exists and we created it. If an exception other than the one
we expect is raised, re-raises it.
"""Try to create the lock file. If this succeeds, the lock
file exists and we created it, so we hold the lock. If an
exception other than the one we expect is raised, re-raises
it.

:returns: True if we created the lock, False if we didn't.
"""
Expand All @@ -89,6 +96,10 @@ def _try_create(self):
# Note that NamedTemporaryFile will clean up the file it created,
# but the lockfile we created by doing the link will stay around.
with tempfile.NamedTemporaryFile(mode="w", suffix=".pid", prefix=self._name, dir=_DEFAULT_LOCK_DIR) as f:
# Elsewhere in the code, we use flock() on this file;
# using that call on a group- or world-readable file poses an
# extreme security risk.
os.chmod(f.name, stat.S_IRUSR | stat.S_IWUSR)
jochapma marked this conversation as resolved.
Show resolved Hide resolved
f.write(str(self._pid) + "\n")
f.flush()
try:
Expand All @@ -97,14 +108,28 @@ def _try_create(self):
except OSError as exc:
if exc.errno == errno.EEXIST:
return False
raise exc
Venefilyn marked this conversation as resolved.
Show resolved Hide resolved
raise
loggerinst.debug("%s." % self)
return True

r0x0d marked this conversation as resolved.
Show resolved Hide resolved
@property
def is_locked(self):
"""Test whether this object is locked."""
return self._locked
"""Test whether this object was locked by this instance of
the application."""
try:
with open(self._pidfile, "r") as filep:
fcntl.flock(filep, fcntl.LOCK_EX)
jochapma marked this conversation as resolved.
Show resolved Hide resolved
try:
file_contents = filep.read()
pid = int(file_contents.rstrip())
if pid:
return pid == self._pid
finally:
fcntl.flock(filep, fcntl.LOCK_UN)
jochapma marked this conversation as resolved.
Show resolved Hide resolved
except (IOError, OSError) as exc:
if exc.errno == errno.ENOENT:
pass
return False

@staticmethod
def _pid_exists(pid):
Expand All @@ -115,57 +140,71 @@ def _pid_exists(pid):
os.kill(pid, 0)
except OSError as exc:
# The only other (theoretical) possibility is EPERM, which
# would mean the process exists.
# would mean the process exists and therefore we should return
# True.
if exc.errno == errno.ESRCH:
return False
return True

def try_to_lock(self, _recursive=False):
"""Try to get a lock on this application. If successful,
the application will be locked; the lock should be released
with unlock().
def _safe_unlink(self):
"""Unlink the lock file. If the unlink fails because the file
doesn't exist, swallow the exception; this avoids spurious
errors due to race conditions.
"""
try:
os.unlink(self._pidfile)
except OSError as exc:
# In Python 3 this could be changed to FileNotFoundError.
if exc.errno == errno.ENOENT:
return
raise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't covered by tests


def try_to_lock(self):
"""Try to get a lock on this application. If this method does
not raise an Exception, the application will be locked and we
hold the lock; the lock should be released with unlock().

If the file has unexpected contents, for safety we treat the
application as locked, since it is probably the result of
manual meddling, intentional or otherwise.

:keyword _recursive: True if we are being called recursively
and should not try to clean up the lockfile
again.
:raises ApplicationLockedError: the application is locked
"""
if self._try_create():
self._locked = True
return
if _recursive:
raise ApplicationLockedError("Cannot lock %s" % self._name)

with open(self._pidfile, "r") as f:
file_contents = f.read()
try:
pid = int(file_contents.rstrip())
except ValueError:
raise ApplicationLockedError("Lock file %s is corrupt" % self._pidfile)

if self._pid_exists(pid):
raise ApplicationLockedError("%s locked by process %d" % (self._pidfile, pid))
# The lock file was created by a process that has exited;
# remove it and try again.
loggerinst.info("Cleaning up lock held by exited process %d." % pid)
os.unlink(self._pidfile)
self.try_to_lock(_recursive=True)
with open(self._pidfile, "r") as filep:
fcntl.flock(filep, fcntl.LOCK_EX)
jochapma marked this conversation as resolved.
Show resolved Hide resolved
try:
file_contents = filep.read()
pid = int(file_contents.rstrip())
if pid == self._pid:
return
if self._pid_exists(pid):
raise ApplicationLockedError("%s locked by process %d" % (self._pidfile, pid))
# The lock file was created by a process that has exited;
# remove it and try again.
loggerinst.info("Cleaning up lock held by exited process %d." % pid)
self._safe_unlink()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we not reuse the logic of is_locked()? Otherwise we might change it in the future on one part and miss the other etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was unable to find a way to avoid a race condition in unlink without keeping a file pointer open across multiple methods, which all my reviewers didn't like. So I removed the automatic cleanup; this also has the benefit of simplifying everything considerably, a good idea since this is my last week.

What this does is add another "can't happen" case (if the lock file somehow doesn't get cleaned up), but that can be easily remedied manually.

In the course of doing that, I addressed the is_locked() reuse.

except ValueError:
raise ApplicationLockedError("%s has invalid contents" % (self._pidfile))
finally:
fcntl.flock(filep, fcntl.LOCK_UN)
jochapma marked this conversation as resolved.
Show resolved Hide resolved

if not self._try_create():
# Between the unlink and our attempt to create the lock
# file, another process got there first.
raise ApplicationLockedError("%s is locked" % self._pidfile)

def unlock(self):
"""Release the lock on this application.

Note that if the unlink fails (a pathological failure) the
object will stay locked and the OSError or other
Note that if the safe unlink fails (a pathological failure)
the object will stay locked and the OSError or other
system-generated exception will be raised.
"""
if not self._locked:
if not self.is_locked:
return
os.unlink(self._pidfile)
self._locked = False
self._safe_unlink()
loggerinst.debug("%s." % self)

def __enter__(self):
Expand Down
13 changes: 11 additions & 2 deletions convert2rhel/unit_tests/applock_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,22 @@ def test_applock_basic(tmp_lock):

def test_applock_basic_islocked(tmp_lock):
with open(tmp_lock._pidfile, "w") as f:
pid = os.getpid()
f.write(str(pid) + "\n")
# Our parent process will be running and have a different pid
ppid = os.getppid()
f.write(str(ppid) + "\n")
with pytest.raises(applock.ApplicationLockedError):
tmp_lock.try_to_lock()
os.unlink(tmp_lock._pidfile)


def test_applock_basic_lock_idempotent(tmp_lock):
with open(tmp_lock._pidfile, "w") as f:
pid = os.getpid()
f.write(str(pid) + "\n")
tmp_lock.try_to_lock()
os.unlink(tmp_lock._pidfile)


def test_applock_basic_reap(tmp_lock):
"""Test the case where the lockfile was held by a process
that has exited."""
Expand Down
Loading