Skip to content

Commit

Permalink
update collages in cache
Browse files Browse the repository at this point in the history
  • Loading branch information
azuline committed Oct 17, 2023
1 parent 4d2af17 commit 8ede210
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 77 deletions.
6 changes: 3 additions & 3 deletions rose/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import click

from rose.cache import migrate_database, update_cache_for_all_releases
from rose.cache import migrate_database, update_cache
from rose.config import Config
from rose.print import print_releases
from rose.virtualfs import mount_virtualfs, unmount_virtualfs
Expand Down Expand Up @@ -47,7 +47,7 @@ def cache() -> None:
# fmt: on
def update(ctx: Context, force: bool) -> None:
"""Update the read cache from disk data."""
update_cache_for_all_releases(ctx.config, force)
update_cache(ctx.config, force)


@cli.group()
Expand All @@ -61,7 +61,7 @@ def fs() -> None:
def mount(ctx: Context, foreground: bool) -> None:
"""Mount the virtual library."""
# Trigger a cache refresh in the background when we first mount the filesystem.
p = Process(target=update_cache_for_all_releases, args=[ctx.config, False])
p = Process(target=update_cache, args=[ctx.config, False])
try:
p.start()
mount_virtualfs(ctx.config, foreground)
Expand Down
184 changes: 161 additions & 23 deletions rose/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from contextlib import contextmanager
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any

import tomli_w
import tomllib
Expand Down Expand Up @@ -115,6 +116,20 @@ class CachedTrack:
formatted_artists: str


@dataclass
class CachedCollage:
name: str
source_mtime: str
release_ids: list[str]


@dataclass
class CachedPlaylist:
name: str
source_mtime: str
track_ids: list[str]


@dataclass
class StoredDataFile:
new: bool
Expand Down Expand Up @@ -149,13 +164,14 @@ class StoredDataFile:
STORED_DATA_FILE_REGEX = re.compile(r"\.rose\.([^.]+)\.toml")


def update_cache_for_all_releases(c: Config, force: bool = False) -> None:
def update_cache(c: Config, force: bool = False) -> None:
"""
Update the read cache to match the data for all releases in the music source directory. Delete
any cached releases that are no longer present on disk.
"""
dirs = [Path(d.path).resolve() for d in os.scandir(c.music_source_dir) if d.is_dir()]
update_cache_for_releases(c, dirs, force)
update_cache_for_collages(c, force)
update_cache_delete_nonexistent_releases(c)


Expand Down Expand Up @@ -220,21 +236,21 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool =
rf"""
WITH genres AS (
SELECT
release_id,
GROUP_CONCAT(genre, ' \\ ') AS genres
release_id
, GROUP_CONCAT(genre, ' \\ ') AS genres
FROM releases_genres
GROUP BY release_id
), labels AS (
SELECT
release_id,
GROUP_CONCAT(label, ' \\ ') AS labels
release_id
, GROUP_CONCAT(label, ' \\ ') AS labels
FROM releases_labels
GROUP BY release_id
), artists AS (
SELECT
release_id,
GROUP_CONCAT(artist, ' \\ ') AS names,
GROUP_CONCAT(role, ' \\ ') AS roles
release_id
, GROUP_CONCAT(artist, ' \\ ') AS names
, GROUP_CONCAT(role, ' \\ ') AS roles
FROM releases_artists
GROUP BY release_id
)
Expand Down Expand Up @@ -294,9 +310,9 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool =
rf"""
WITH artists AS (
SELECT
track_id,
GROUP_CONCAT(artist, ' \\ ') AS names,
GROUP_CONCAT(role, ' \\ ') AS roles
track_id
, GROUP_CONCAT(artist, ' \\ ') AS names
, GROUP_CONCAT(role, ' \\ ') AS roles
FROM tracks_artists
GROUP BY track_id
)
Expand Down Expand Up @@ -476,7 +492,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool =
track_mtime = str(os.stat(track_path).st_mtime)
# Skip re-read if we can reuse a cached entry.
if cached_track and track_mtime == cached_track.source_mtime and not force:
logger.debug(f"Track cache hit (mtime) for {f}, reusing cached data")
logger.debug(f"Track cache hit (mtime) for {f.name}, reusing cached data")
tracks.append(cached_track)
unknown_cached_tracks.remove(str(track_path))
continue
Expand Down Expand Up @@ -787,7 +803,129 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool =
(track.id, art.name, sanitize_filename(art.name), art.role, art.role),
)

logger.debug(f"Update loop time {time.time() - loop_start=}")
logger.debug(f"Release update loop time {time.time() - loop_start=}")


def update_cache_for_collages(c: Config, force: bool = False) -> None:
"""
Update the read cache to match the data for all stored collages.
This is performance-optimized in the same way as the update releases function. We:
1. Execute one big SQL query at the start to fetch the relevant previous caches.
2. Skip reading a file's data if the mtime has not changed since the previous cache update.
3. Only execute a SQLite upsert if the read data differ from the previous caches.
"""
collage_dir = c.music_source_dir / "!collages"
collage_dir.mkdir(exist_ok=True)

files: list[tuple[Path, str, os.DirEntry[str]]] = []
for f in os.scandir(str(collage_dir)):
path = Path(f.path)
if path.suffix != ".toml":
continue
files.append((path.resolve(), path.stem, f))
logger.info(f"Refreshing the read cache for {len(files)} collages")

cached_collages: dict[str, CachedCollage] = {}
with connect(c) as conn:
cursor = conn.execute(
r"""
SELECT
c.name
, c.source_mtime
, COALESCE(GROUP_CONCAT(cr.release_id, ' \\ '), '') AS release_ids
FROM collages c
LEFT JOIN collages_releases cr ON cr.collage_name = c.name
""",
)
for row in cursor:
cached_collages[row["name"]] = CachedCollage(
name=row["name"],
source_mtime=row["source_mtime"],
release_ids=row["release_ids"].split(r" \\ "),
)

# We want to validate that all release IDs exist before we write them. In order to do that,
# we need to know which releases exist.
cursor = conn.execute("SELECT id FROM releases")
existing_release_ids = {row["id"] for row in cursor}

loop_start = time.time()
with connect(c) as conn:
for source_path, name, f in files:
try:
cached_collage = cached_collages[name]
except KeyError:
logger.debug(f"First-time unidentified collage found at {source_path}")
cached_collage = CachedCollage(
name=name,
source_mtime="",
release_ids=[],
)

source_mtime = str(f.stat().st_mtime)
if source_mtime == cached_collage.source_mtime and not force:
logger.debug(f"Collage cache hit (mtime) for {source_path}, reusing cached data")

logger.debug(f"Collage cache miss (mtime) for {source_path}, reading data from disk")
cached_collage.source_mtime = source_mtime

with source_path.open("rb") as fp:
diskdata = tomllib.load(fp)

# Track the listed releases that no longer exist. Remove them from the collage file
# after.
nonexistent_release_idxs: list[int] = []
for idx, rls in enumerate(diskdata.get("releases", [])):
if rls["uuid"] not in existing_release_ids:
nonexistent_release_idxs.append(idx)
continue
cached_collage.release_ids.append(rls["uuid"])

conn.execute(
"""
INSERT INTO collages (name, source_mtime) VALUES (?, ?)
ON CONFLICT (name) DO UPDATE SET source_mtime = ?
""",
(cached_collage.name, cached_collage.source_mtime, cached_collage.source_mtime),
)
conn.execute(
"DELETE FROM collages_releases WHERE collage_name = ?",
(cached_collage.name,),
)
args: list[Any] = []
for position, rid in enumerate(cached_collage.release_ids):
args.extend([cached_collage.name, rid, position])
if args:
conn.execute(
f"""
INSERT INTO collages_releases (collage_name, release_id, position)
VALUES {','.join(['(?, ?, ?)'] * len(cached_collage.release_ids))}
""",
args,
)

logger.info(f"Applying cache updates for collage {cached_collage.name}")

if nonexistent_release_idxs:
new_diskdata_releases: list[dict[str, str]] = []
removed_releases: list[str] = []
for idx, rls in enumerate(diskdata.get("releases", [])):
if idx in nonexistent_release_idxs:
removed_releases.append(rls["description_meta"])
continue
new_diskdata_releases.append(rls)

with source_path.open("wb") as fp:
tomli_w.dump({"releases": new_diskdata_releases}, fp)

logger.info(
f"Removing nonexistent releases from collage {cached_collage.name}: "
f"{','.join(removed_releases)}"
)

logger.debug(f"Collage update loop time {time.time() - loop_start=}")


def list_releases(
Expand All @@ -800,21 +938,21 @@ def list_releases(
query = r"""
WITH genres AS (
SELECT
release_id,
GROUP_CONCAT(genre, ' \\ ') AS genres
release_id
, GROUP_CONCAT(genre, ' \\ ') AS genres
FROM releases_genres
GROUP BY release_id
), labels AS (
SELECT
release_id,
GROUP_CONCAT(label, ' \\ ') AS labels
release_id
, GROUP_CONCAT(label, ' \\ ') AS labels
FROM releases_labels
GROUP BY release_id
), artists AS (
SELECT
release_id,
GROUP_CONCAT(artist, ' \\ ') AS names,
GROUP_CONCAT(role, ' \\ ') AS roles
release_id
, GROUP_CONCAT(artist, ' \\ ') AS names
, GROUP_CONCAT(role, ' \\ ') AS roles
FROM releases_artists
GROUP BY release_id
)
Expand Down Expand Up @@ -904,9 +1042,9 @@ def get_release_files(c: Config, release_virtual_dirname: str) -> ReleaseFiles:
r"""
WITH artists AS (
SELECT
track_id,
GROUP_CONCAT(artist, ' \\ ') AS names,
GROUP_CONCAT(role, ' \\ ') AS roles
track_id
, GROUP_CONCAT(artist, ' \\ ') AS names
, GROUP_CONCAT(role, ' \\ ') AS roles
FROM tracks_artists
GROUP BY track_id
)
Expand Down
28 changes: 11 additions & 17 deletions rose/cache.sql
Original file line number Diff line number Diff line change
Expand Up @@ -100,36 +100,30 @@ CREATE INDEX tracks_artists_track_id ON tracks_artists(track_id);
CREATE INDEX tracks_artists_artist ON tracks_artists(artist);
CREATE INDEX tracks_artists_artist_sanitized ON tracks_artists(artist_sanitized);

CREATE TABLE collections (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
source_path TEXT UNIQUE NOT NULL,
CREATE TABLE collages (
name TEXT PRIMARY KEY,
source_mtime TEXT NOT NULL
);
CREATE INDEX collections_source_path ON collections(source_path);

CREATE TABLE collections_releases (
collection_id TEXT REFERENCES collections(id) ON DELETE CASCADE,
CREATE TABLE collages_releases (
collage_name TEXT REFERENCES collages(name) ON DELETE CASCADE,
release_id TEXT REFERENCES releases(id) ON DELETE CASCADE,
position INTEGER NOT NULL
);
CREATE INDEX collections_releases_collection_id ON collections_releases(collection_id);
CREATE INDEX collections_releases_release_id ON collections_releases(release_id);
CREATE UNIQUE INDEX collections_releases_collection_position ON collections_releases(collection_id, position);
CREATE INDEX collages_releases_collage_name ON collages_releases(collage_name);
CREATE INDEX collages_releases_release_id ON collages_releases(release_id);
CREATE UNIQUE INDEX collages_releases_collage_position ON collages_releases(collage_name, position);

CREATE TABLE playlists (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
source_path TEXT UNIQUE NOT NULL,
name TEXT PRIMARY KEY,
source_mtime TEXT NOT NULL
);
CREATE INDEX playlists_source_path ON playlists(source_path);

CREATE TABLE playlists_tracks (
playlist_id TEXT REFERENCES playlists(id) ON DELETE CASCADE,
playlist_name TEXT REFERENCES playlists(name) ON DELETE CASCADE,
track_id TEXT REFERENCES tracks(id) ON DELETE CASCADE,
position INTEGER NOT NULL
);
CREATE INDEX playlists_tracks_playlist_id ON playlists_tracks(playlist_id);
CREATE INDEX playlists_tracks_playlist_name ON playlists_tracks(playlist_name);
CREATE INDEX playlists_tracks_track_id ON playlists_tracks(track_id);
CREATE UNIQUE INDEX playlists_tracks_playlist_position ON playlists_tracks(playlist_id, position);
CREATE UNIQUE INDEX playlists_tracks_playlist_position ON playlists_tracks(playlist_name, position);
Loading

0 comments on commit 8ede210

Please sign in to comment.