Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small updates on Slurm config and some linter small rollbacks #150

Merged
merged 11 commits into from
Mar 15, 2024
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ terraform.tfstate.backup
# Environment/key files
.envrc

# VSCode
.vscode

# ssh
id_rsa
id_rsa.pub
Expand All @@ -21,4 +24,4 @@ inventory

# nix
.direnv
shell.nix
shell.nix
1 change: 0 additions & 1 deletion roles/cifs/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- smbd
1 change: 0 additions & 1 deletion roles/dask_gateway/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- dask-gateway
4 changes: 2 additions & 2 deletions roles/dask_gateway/tasks/dask_gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
owner: dask
group: dask
mode: "0644"
notify: restart services dask-gateway
notify: Restart services dask-gateway

- name: Copy the dask-gateway systemd service file
become: true
Expand Down Expand Up @@ -77,7 +77,7 @@
owner: root
group: root
mode: "0644"
notify: restart services dask-gateway
notify: Restart services dask-gateway

- name: Ensure dask-gateway is enabled on boot
become: true
Expand Down
10 changes: 3 additions & 7 deletions roles/dask_gateway/templates/environments/dask-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@ name: dask-gateway
channels:
- conda-forge
dependencies:
# had to install dask-gateway-server via pip due to
# conda-forge install issue
# https://github.com/dask/dask-gateway/issues/366
- pip
- pip:
- dask-gateway-server
- sqlalchemy
- python
- dask-gateway-server-jobqueue
- sqlalchemy
3 changes: 0 additions & 3 deletions roles/jupyterhub/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- jupyterhub

Expand All @@ -15,7 +14,6 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- jupyterhub-proxy

Expand All @@ -25,6 +23,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- jupyterhub-ssh
1 change: 0 additions & 1 deletion roles/mysql/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- mysql
2 changes: 1 addition & 1 deletion roles/mysql/tasks/mysql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
mode: "0644"
backup: true
with_dict: "{{ mysql_config }}"
notify: restart services mysql
notify: Restart services mysql

- name: Create mysql database
become: true
Expand Down
1 change: 0 additions & 1 deletion roles/openldap/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- nscd
- nslcd
8 changes: 4 additions & 4 deletions roles/openldap/tasks/client.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
regexp: pam_mkhomedir\.so
line: session required pam_mkhomedir.so skel=/etc/skel/ umask=0022
state: present
notify: restart services ldap
notify: Restart services ldap

- name: LDAP Authentication | Query ldap in nsswitch.conf
become: true
Expand All @@ -28,7 +28,7 @@
- passwd
- shadow
- group
notify: restart services ldap
notify: Restart services ldap

- name: LDAP Authentication | no cache for ldap in nscd.conf
become: true
Expand All @@ -40,12 +40,12 @@
with_items:
- passwd
- group
notify: restart services ldap
notify: Restart services ldap

- name: LDAP Authentication | Configure /etc/nslcd.conf
become: true
ansible.builtin.template:
src: nslcd.conf.j2
dest: /etc/nslcd.conf
mode: "0600"
notify: restart services ldap
notify: Restart services ldap
1 change: 1 addition & 0 deletions roles/openldap/tasks/openldap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
- name: Load ddbb template into ldap
become: true
ansible.builtin.command: ldapmodify -Y EXTERNAL -H ldapi:/// -f /tmp/db.ldif

- name: Load ldap root entry
community.general.ldap_entry:
server_uri: "{{ openldap_server_uri }}"
Expand Down
1 change: 0 additions & 1 deletion roles/postgresql/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- postgresql
2 changes: 2 additions & 0 deletions roles/slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ slurmd_enabled: false
slurmctld_enabled: false
slurmdbd_enabled: false

SlurmConfigFileDIr: /etc/slurm-llnl

slurm_config:
ClusterName: cluster
# slurmctld options
Expand Down
8 changes: 4 additions & 4 deletions roles/slurm/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
- name: Ensure that slurm configuration directory exists
become: true
ansible.builtin.file:
path: /etc/slurm
path: "{{ SlurmConfigFileDIr }}"
state: directory
mode: "0755"
owner: root
Expand All @@ -25,10 +25,10 @@
become: true
ansible.builtin.template:
src: templates/slurm.conf
dest: /etc/slurm/slurm.conf
dest: "{{ SlurmConfigFileDIr }}/slurm.conf"
owner: root
group: root
mode: "0444"
mode: "0755"
register: _slurm_config

- name: Install extra execution host configs
Expand All @@ -39,7 +39,7 @@
ConstrainCores=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes
dest: /etc/slurm/cgroup.conf
dest: "{{ SlurmConfigFileDIr }}/cgroup.conf"
owner: root
group: root
mode: "0444"
Expand Down
1 change: 1 addition & 0 deletions roles/slurm/tasks/slurm_exporter.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
- name: Install golang
ansible.builtin.include_tasks: golang.yaml

- name: Check that the slurm exporter binary exists
ansible.builtin.stat:
path: /usr/local/bin/prometheus_slurm_exporter
Expand Down
12 changes: 9 additions & 3 deletions roles/slurm/tasks/slurmctld.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
---
# Must be writable by user SlurmUser.
# The file must be accessible by the primary and backup control machines.
- name: Ensure slurm state directory exists
become: true
ansible.builtin.file:
path: "{{ slurm_config.StateSaveLocation }}"
state: directory
mode: "0700"
mode: "0755"
owner: slurm
group: slurm

# Must be writable by user SlurmUser.
# The file must be accessible by the primary and backup control machines.
- name: Ensure slurm log directory exists
become: true
ansible.builtin.file:
path: "{{ slurm_config.SlurmctldLogFile | dirname }}"
state: directory
mode: "0700"
mode: "0755"
owner: slurm
group: slurm

# Must be writable by user root. Preferably writable and removable by SlurmUser.
# The file must be accessible by the primary and backup control machines.
- name: Ensure slurm pid directory exists
become: true
ansible.builtin.file:
Expand All @@ -33,7 +39,7 @@
[Unit]
Description=Slurm controller daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
ConditionPathExists={{ SlurmConfigFileDIr }}/slurm.conf

[Service]
Type=forking
Expand Down
8 changes: 7 additions & 1 deletion roles/slurm/tasks/slurmd.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
---
# Permissions must be set to 755 so that job scripts can be executed from this directory.
# A distinct file must exist on each compute node.
- name: Create slurm spool directory
become: true
ansible.builtin.file:
Expand All @@ -8,6 +10,8 @@
mode: "0755"
state: directory

# Must be writable by user root.
# A distinct file must exist on each compute node.
- name: Create slurm log directory
become: true
ansible.builtin.file:
Expand All @@ -17,6 +21,8 @@
mode: "0755"
state: directory

# Must be writable by user root.
# A distinct file must exist on each compute node.
- name: Ensure slurm pid directory exists
become: true
ansible.builtin.file:
Expand All @@ -33,7 +39,7 @@
[Unit]
Description=Slurm node daemon
After=network.target munge.service remote-fs.target
ConditionPathExists=/etc/slurm/slurm.conf
ConditionPathExists={{ SlurmConfigFileDIr }}/slurm.conf

[Service]
Type=forking
Expand Down
12 changes: 9 additions & 3 deletions roles/slurm/tasks/slurmdbd.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
---
# Must be writable by user SlurmUser.
- name: Ensure slurmdbd log directory exists
become: true
ansible.builtin.file:
path: "{{ slurmdbd_config.LogFile | dirname }}"
state: directory
mode: "0700"
mode: "0755"
owner: slurm
group: slurm

# Must be writable by user SlurmUser.
- name: Ensure slurm pid directory exists
become: true
ansible.builtin.file:
Expand All @@ -17,11 +19,15 @@
owner: slurm
group: slurm

# This file should be only on the computer where SlurmDBD executes
# and should only be readable by the user which executes SlurmDBD (e.g. "slurm").
# This file should be protected from unauthorized access since
# it contains a database password
- name: Install slurmdbd.conf
become: true
ansible.builtin.template:
src: templates/slurmdbd.conf
dest: /etc/slurm/slurmdbd.conf
dest: "{{ SlurmConfigFileDIr }}/slurmdbd.conf"
owner: slurm
group: slurm
mode: "0600"
Expand All @@ -34,7 +40,7 @@
[Unit]
Description=Slurm DBD accounting daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurmdbd.conf
ConditionPathExists={{ SlurmConfigFileDIr }}/slurmdbd.conf

[Service]
Type=forking
Expand Down
1 change: 0 additions & 1 deletion roles/traefik/handlers/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
name: "{{ item }}"
enabled: "yes"
state: restarted
cmd: ""
with_items:
- traefik
10 changes: 5 additions & 5 deletions roles/traefik/tasks/traefik.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
owner: traefik
group: traefik
when: traefik_tls_certificate is defined
notify: restart services traefik
notify: Restart services traefik
register: _traefik_tls_certificate

- name: Copy TLS key if provided
Expand All @@ -102,7 +102,7 @@
owner: traefik
group: traefik
when: traefik_tls_key is defined
notify: restart services traefik
notify: Restart services traefik
register: _traefik_tls_key

- name: Copy traefik configuration
Expand All @@ -113,7 +113,7 @@
mode: "0600"
owner: traefik
group: traefik
notify: restart services traefik
notify: Restart services traefik

- name: Copy traefik dynamic configuration
become: true
Expand All @@ -123,7 +123,7 @@
mode: "0600"
owner: traefik
group: traefik
notify: restart services traefik
notify: Restart services traefik

- name: Copy the traefik systemd service file
become: true
Expand Down Expand Up @@ -155,7 +155,7 @@
owner: root
group: root
mode: "0644"
notify: restart services traefik
notify: Restart services traefik

- name: Ensure Traefik is enabled on boot
become: true
Expand Down
Loading