Skip to content

Commit

Permalink
pglookout: support explicit failover priorities
Browse files Browse the repository at this point in the history
Support explicit prioritization between instances. This can be
configured via ``failover_priorities`` key, and will be consulted
upon picking up the standby that should do the promotion in cases
where multiple nodes have a matching replication position.

Previously, and also as the current default, the selection was based
on the sorting order of the remote nodes.

The configuration option allows some additional flexibility, and
supports e.g. topologies where we have more favorable and less
desirable standbys in multiple different network locations.
  • Loading branch information
hnousiainen committed Nov 25, 2024
1 parent 60f65b2 commit 1b86c58
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 8 deletions.
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,14 @@ over_warning_limit_command and to create a warning file.

Shell command to execute in case the node has deemed itself in need of promotion

``failover_priorities`` (default ``{}``)

Define priority of nodes for promotion, in case there are multiple candidates
with the same replication position. This allows to ensure all pglookout instances
would elect the same standby for promotion, while still allowing for topologies
with e.g. less preferred standbys in secondary network locations. By default,
pglookout uses remote connection ids for the same selection purpose.

``known_gone_nodes`` (default ``[]``)

Lists nodes that are explicitly known to have left the cluster. If the old
Expand Down
23 changes: 15 additions & 8 deletions pglookout/pglookout.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,14 +643,21 @@ def do_failover_decision(self, standby_nodes):
if not known_replication_positions:
self.log.warning("No known replication positions, canceling failover consideration")
return
# If there are multiple nodes with the same replication positions pick the one with the "highest" name
# to make sure pglookouts running on all standbys make the same decision. The rationale for picking
# the "highest" node is that there's no obvious way for pglookout to decide which of the nodes is
# "best" beyond looking at replication positions, but picking the highest id supports environments
# where nodes are assigned identifiers from an incrementing sequence identifiers and where we want to
# promote the latest and greatest node. In static environments node identifiers can be priority
# numbers, with the highest number being the one that should be preferred.
furthest_along_instance = max(known_replication_positions[max(known_replication_positions)])

# Find the instance that is furthest along.
# If there are multiple nodes with the same replication positions, try to identify one to promote either
# via explicit failover priority configuration or pick the one with the "highest" name.
# The rationale of this logic is to ensure all participating pglookouts running on all standbys make
# the same decision. The "highest" name works well in environments where nodes are assigned identifiers
# from an incrementing sequence and where we want to promote the latest and greatest node.
def _priority_or_id(instance):
priority = self.config.get("failover_priorities", {}).get(instance)
if priority is None:
priority = instance
return priority

furthest_along_instances = known_replication_positions[max(known_replication_positions)]
furthest_along_instance = sorted(furthest_along_instances, key=_priority_or_id, reverse=True)[0]
self.log.warning(
"Node that is furthest along is: %r, all replication positions were: %r",
furthest_along_instance,
Expand Down
53 changes: 53 additions & 0 deletions test/test_lookout.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,59 @@ def test_standbys_failover_equal_replication_positions(pgl):
assert pgl.execute_external_command.call_count == 1


def test_standbys_failover_equal_replication_positions_with_priorities(pgl):
now = datetime.datetime.utcnow()
_set_instance_cluster_state(
pgl,
instance="192.168.54.183",
pg_last_xlog_receive_location="0/70004D8",
pg_is_in_recovery=True,
connection=True,
replication_time_lag=400.435871,
fetch_time=now,
db_time=now,
conn_info="foobar",
)
_set_instance_cluster_state(
pgl,
instance="192.168.57.180",
pg_last_xlog_receive_location=None,
pg_is_in_recovery=False,
connection=False,
replication_time_lag=0.0,
fetch_time=now - datetime.timedelta(seconds=3600),
db_time=now - datetime.timedelta(seconds=3600),
conn_info="foobar",
)
_set_instance_cluster_state(
pgl,
instance="192.168.63.4",
pg_last_xlog_receive_location="0/70004D8",
pg_is_in_recovery=True,
connection=True,
replication_time_lag=401.104655,
fetch_time=now,
db_time=now,
conn_info="foobar",
)

pgl.current_master = "192.168.57.180"

pgl.config["failover_priorities"] = {
"192.168.54.183": 1000,
"192.168.63.4": 0,
}

# This is highest by instance, but lower in priority
pgl.own_db = "192.168.63.4"
pgl.check_cluster_state()
assert pgl.execute_external_command.call_count == 0
# Lower by instance, but higher in priority
pgl.own_db = "192.168.54.183"
pgl.check_cluster_state()
assert pgl.execute_external_command.call_count == 1


def test_node_map_when_only_observer_sees_master(pgl):
cluster_state = {
"10.255.255.10": {
Expand Down

0 comments on commit 1b86c58

Please sign in to comment.