Skip to content
This repository has been archived by the owner on Aug 9, 2024. It is now read-only.

feat!: overhaul slurmctld API part 2 #27

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ __pycache__/
.idea
.vscode/
version
.mypy_cache
.ruff_cache
2 changes: 0 additions & 2 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ requires:
interface: slurmdbd
slurmrestd:
interface: slurmrestd
fluentbit:
interface: fluentbit

assumes:
- juju
Expand Down
203 changes: 0 additions & 203 deletions lib/charms/fluentbit/v0/fluentbit.py

This file was deleted.

17 changes: 1 addition & 16 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import subprocess
from typing import Any, Dict, List, Optional, Union

from charms.fluentbit.v0.fluentbit import FluentbitClient
from constants import CHARM_MAINTAINED_SLURM_CONF_PARAMETERS, FLUENTBIT_CONFIG, SLURM_CONF_PATH
from constants import CHARM_MAINTAINED_SLURM_CONF_PARAMETERS, SLURM_CONF_PATH
from interface_slurmd import (
PartitionAvailableEvent,
PartitionUnavailableEvent,
Expand All @@ -34,7 +33,6 @@
CharmBase,
ConfigChangedEvent,
InstallEvent,
RelationCreatedEvent,
StoredState,
UpdateStatusEvent,
WaitingStatus,
Expand Down Expand Up @@ -68,29 +66,21 @@ def __init__(self, *args):

self._slurmctld_manager = SlurmctldManager()

self._fluentbit = FluentbitClient(self, "fluentbit")
self._slurmd = Slurmd(self, "slurmd")
self._slurmdbd = Slurmdbd(self, "slurmdbd")
self._slurmrestd = Slurmrestd(self, "slurmrestd")

event_handler_bindings = {
# Charm lifecycle hook events
self.on.install: self._on_install,
self.on.update_status: self._on_update_status,
self.on.config_changed: self._on_config_changed,
# slurmdbd lifecycle hook events
self._slurmdbd.on.slurmdbd_available: self._on_slurmdbd_available,
self._slurmdbd.on.slurmdbd_unavailable: self._on_slurmdbd_unavailable,
# slurmd lifecycle hook events
self._slurmd.on.partition_available: self._on_write_slurm_conf,
self._slurmd.on.partition_unavailable: self._on_write_slurm_conf,
self._slurmd.on.slurmd_available: self._on_write_slurm_conf,
self._slurmd.on.slurmd_departed: self._on_write_slurm_conf,
# slurmrestd available
self._slurmrestd.on.slurmrestd_available: self._on_slurmrestd_available,
# fluentbit
self.on["fluentbit"].relation_created: self._on_fluentbit_relation_created,
# actions
self.on.show_current_config_action: self._on_show_current_config_action,
self.on.drain_action: self._on_drain_nodes_action,
self.on.resume_action: self._on_resume_nodes_action,
Expand Down Expand Up @@ -173,11 +163,6 @@ def _on_show_current_config_action(self, event: ActionEvent) -> None:
slurm_conf = SLURM_CONF_PATH.read_text()
event.set_results({"slurm.conf": slurm_conf})

def _on_fluentbit_relation_created(self, event: RelationCreatedEvent) -> None:
"""Set up Fluentbit log forwarding."""
logger.debug("## Configuring fluentbit")
self._fluentbit.configure(FLUENTBIT_CONFIG)

def _on_slurmrestd_available(self, event: SlurmrestdAvailableEvent) -> None:
"""Check that we have slurm_config when slurmrestd available otherwise defer the event."""
if self.model.unit.is_leader():
Expand Down
33 changes: 1 addition & 32 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
SLURM_GROUP = "slurm"

CHARM_MAINTAINED_SLURM_CONF_PARAMETERS = {
"AuthAltParameters": "jwt_key=/var/spool/slurmctldjwt_hs256.key",
"AuthAltParameters": "jwt_key=/var/spool/slurmctld/jwt_hs256.key",
"AuthAltTypes": "auth/jwt",
"AuthInfo": "/var/run/munge/munge.socket.2",
"AuthType": "auth/munge",
Expand All @@ -34,37 +34,6 @@
"RebootProgram": '"/usr/sbin/reboot --reboot"',
}


FLUENTBIT_CONFIG = [
{
"input": [
("name", "tail"),
("path", "/var/log/slurm/slurmctld.log"),
("path_key", "filename"),
("tag", "slurmctld"),
("parser", "slurm"),
]
},
{
"parser": [
("name", "slurm"),
("format", "regex"),
("regex", r"^\[(?<time>[^\]]*)\] (?<log>.*)$"),
("time_key", "time"),
("time_format", "%Y-%m-%dT%H:%M:%S.%L"),
]
},
{
"filter": [
("name", "record_modifier"),
("match", "slurmctld"),
("record", "hostname ${HOSTNAME}"),
("record", "service slurmctld"),
]
},
]


UBUNTU_HPC_PPA_KEY = """
-----BEGIN PGP PUBLIC KEY BLOCK-----
Comment: Hostname:
Expand Down
7 changes: 7 additions & 0 deletions src/slurmctld_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ def install(self) -> bool:
return False
systemd.service_stop("munge")

spool_dir = Path("/var/spool/slurmctld")
if not spool_dir.exists():
spool_dir.mkdir()

slurm_user_uid, slurm_group_gid = _get_slurm_user_uid_and_slurm_group_gid()
os.chown(f"{spool_dir}", slurm_user_uid, slurm_group_gid)

return True

def version(self) -> str:
Expand Down
Loading