From 8ede2107e5295de261bc431ebb321900352b11ee Mon Sep 17 00:00:00 2001 From: blissful Date: Tue, 17 Oct 2023 10:29:21 -0400 Subject: [PATCH] update collages in cache --- rose/__main__.py | 6 +- rose/cache.py | 184 +++++++++++++++++++++--- rose/cache.sql | 28 ++-- rose/cache_test.py | 112 ++++++++++----- testdata/cache/Collage 1/Rose Gold.toml | 3 + 5 files changed, 256 insertions(+), 77 deletions(-) create mode 100644 testdata/cache/Collage 1/Rose Gold.toml diff --git a/rose/__main__.py b/rose/__main__.py index 009b2b7..69a585d 100644 --- a/rose/__main__.py +++ b/rose/__main__.py @@ -5,7 +5,7 @@ import click -from rose.cache import migrate_database, update_cache_for_all_releases +from rose.cache import migrate_database, update_cache from rose.config import Config from rose.print import print_releases from rose.virtualfs import mount_virtualfs, unmount_virtualfs @@ -47,7 +47,7 @@ def cache() -> None: # fmt: on def update(ctx: Context, force: bool) -> None: """Update the read cache from disk data.""" - update_cache_for_all_releases(ctx.config, force) + update_cache(ctx.config, force) @cli.group() @@ -61,7 +61,7 @@ def fs() -> None: def mount(ctx: Context, foreground: bool) -> None: """Mount the virtual library.""" # Trigger a cache refresh in the background when we first mount the filesystem. - p = Process(target=update_cache_for_all_releases, args=[ctx.config, False]) + p = Process(target=update_cache, args=[ctx.config, False]) try: p.start() mount_virtualfs(ctx.config, foreground) diff --git a/rose/cache.py b/rose/cache.py index 5daac9c..614fc4d 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -8,6 +8,7 @@ from contextlib import contextmanager from dataclasses import asdict, dataclass from pathlib import Path +from typing import Any import tomli_w import tomllib @@ -115,6 +116,20 @@ class CachedTrack: formatted_artists: str +@dataclass +class CachedCollage: + name: str + source_mtime: str + release_ids: list[str] + + +@dataclass +class CachedPlaylist: + name: str + source_mtime: str + track_ids: list[str] + + @dataclass class StoredDataFile: new: bool @@ -149,13 +164,14 @@ class StoredDataFile: STORED_DATA_FILE_REGEX = re.compile(r"\.rose\.([^.]+)\.toml") -def update_cache_for_all_releases(c: Config, force: bool = False) -> None: +def update_cache(c: Config, force: bool = False) -> None: """ Update the read cache to match the data for all releases in the music source directory. Delete any cached releases that are no longer present on disk. """ dirs = [Path(d.path).resolve() for d in os.scandir(c.music_source_dir) if d.is_dir()] update_cache_for_releases(c, dirs, force) + update_cache_for_collages(c, force) update_cache_delete_nonexistent_releases(c) @@ -220,21 +236,21 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool = rf""" WITH genres AS ( SELECT - release_id, - GROUP_CONCAT(genre, ' \\ ') AS genres + release_id + , GROUP_CONCAT(genre, ' \\ ') AS genres FROM releases_genres GROUP BY release_id ), labels AS ( SELECT - release_id, - GROUP_CONCAT(label, ' \\ ') AS labels + release_id + , GROUP_CONCAT(label, ' \\ ') AS labels FROM releases_labels GROUP BY release_id ), artists AS ( SELECT - release_id, - GROUP_CONCAT(artist, ' \\ ') AS names, - GROUP_CONCAT(role, ' \\ ') AS roles + release_id + , GROUP_CONCAT(artist, ' \\ ') AS names + , GROUP_CONCAT(role, ' \\ ') AS roles FROM releases_artists GROUP BY release_id ) @@ -294,9 +310,9 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool = rf""" WITH artists AS ( SELECT - track_id, - GROUP_CONCAT(artist, ' \\ ') AS names, - GROUP_CONCAT(role, ' \\ ') AS roles + track_id + , GROUP_CONCAT(artist, ' \\ ') AS names + , GROUP_CONCAT(role, ' \\ ') AS roles FROM tracks_artists GROUP BY track_id ) @@ -476,7 +492,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool = track_mtime = str(os.stat(track_path).st_mtime) # Skip re-read if we can reuse a cached entry. if cached_track and track_mtime == cached_track.source_mtime and not force: - logger.debug(f"Track cache hit (mtime) for {f}, reusing cached data") + logger.debug(f"Track cache hit (mtime) for {f.name}, reusing cached data") tracks.append(cached_track) unknown_cached_tracks.remove(str(track_path)) continue @@ -787,7 +803,129 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path], force: bool = (track.id, art.name, sanitize_filename(art.name), art.role, art.role), ) - logger.debug(f"Update loop time {time.time() - loop_start=}") + logger.debug(f"Release update loop time {time.time() - loop_start=}") + + +def update_cache_for_collages(c: Config, force: bool = False) -> None: + """ + Update the read cache to match the data for all stored collages. + + This is performance-optimized in the same way as the update releases function. We: + + 1. Execute one big SQL query at the start to fetch the relevant previous caches. + 2. Skip reading a file's data if the mtime has not changed since the previous cache update. + 3. Only execute a SQLite upsert if the read data differ from the previous caches. + """ + collage_dir = c.music_source_dir / "!collages" + collage_dir.mkdir(exist_ok=True) + + files: list[tuple[Path, str, os.DirEntry[str]]] = [] + for f in os.scandir(str(collage_dir)): + path = Path(f.path) + if path.suffix != ".toml": + continue + files.append((path.resolve(), path.stem, f)) + logger.info(f"Refreshing the read cache for {len(files)} collages") + + cached_collages: dict[str, CachedCollage] = {} + with connect(c) as conn: + cursor = conn.execute( + r""" + SELECT + c.name + , c.source_mtime + , COALESCE(GROUP_CONCAT(cr.release_id, ' \\ '), '') AS release_ids + FROM collages c + LEFT JOIN collages_releases cr ON cr.collage_name = c.name + """, + ) + for row in cursor: + cached_collages[row["name"]] = CachedCollage( + name=row["name"], + source_mtime=row["source_mtime"], + release_ids=row["release_ids"].split(r" \\ "), + ) + + # We want to validate that all release IDs exist before we write them. In order to do that, + # we need to know which releases exist. + cursor = conn.execute("SELECT id FROM releases") + existing_release_ids = {row["id"] for row in cursor} + + loop_start = time.time() + with connect(c) as conn: + for source_path, name, f in files: + try: + cached_collage = cached_collages[name] + except KeyError: + logger.debug(f"First-time unidentified collage found at {source_path}") + cached_collage = CachedCollage( + name=name, + source_mtime="", + release_ids=[], + ) + + source_mtime = str(f.stat().st_mtime) + if source_mtime == cached_collage.source_mtime and not force: + logger.debug(f"Collage cache hit (mtime) for {source_path}, reusing cached data") + + logger.debug(f"Collage cache miss (mtime) for {source_path}, reading data from disk") + cached_collage.source_mtime = source_mtime + + with source_path.open("rb") as fp: + diskdata = tomllib.load(fp) + + # Track the listed releases that no longer exist. Remove them from the collage file + # after. + nonexistent_release_idxs: list[int] = [] + for idx, rls in enumerate(diskdata.get("releases", [])): + if rls["uuid"] not in existing_release_ids: + nonexistent_release_idxs.append(idx) + continue + cached_collage.release_ids.append(rls["uuid"]) + + conn.execute( + """ + INSERT INTO collages (name, source_mtime) VALUES (?, ?) + ON CONFLICT (name) DO UPDATE SET source_mtime = ? + """, + (cached_collage.name, cached_collage.source_mtime, cached_collage.source_mtime), + ) + conn.execute( + "DELETE FROM collages_releases WHERE collage_name = ?", + (cached_collage.name,), + ) + args: list[Any] = [] + for position, rid in enumerate(cached_collage.release_ids): + args.extend([cached_collage.name, rid, position]) + if args: + conn.execute( + f""" + INSERT INTO collages_releases (collage_name, release_id, position) + VALUES {','.join(['(?, ?, ?)'] * len(cached_collage.release_ids))} + """, + args, + ) + + logger.info(f"Applying cache updates for collage {cached_collage.name}") + + if nonexistent_release_idxs: + new_diskdata_releases: list[dict[str, str]] = [] + removed_releases: list[str] = [] + for idx, rls in enumerate(diskdata.get("releases", [])): + if idx in nonexistent_release_idxs: + removed_releases.append(rls["description_meta"]) + continue + new_diskdata_releases.append(rls) + + with source_path.open("wb") as fp: + tomli_w.dump({"releases": new_diskdata_releases}, fp) + + logger.info( + f"Removing nonexistent releases from collage {cached_collage.name}: " + f"{','.join(removed_releases)}" + ) + + logger.debug(f"Collage update loop time {time.time() - loop_start=}") def list_releases( @@ -800,21 +938,21 @@ def list_releases( query = r""" WITH genres AS ( SELECT - release_id, - GROUP_CONCAT(genre, ' \\ ') AS genres + release_id + , GROUP_CONCAT(genre, ' \\ ') AS genres FROM releases_genres GROUP BY release_id ), labels AS ( SELECT - release_id, - GROUP_CONCAT(label, ' \\ ') AS labels + release_id + , GROUP_CONCAT(label, ' \\ ') AS labels FROM releases_labels GROUP BY release_id ), artists AS ( SELECT - release_id, - GROUP_CONCAT(artist, ' \\ ') AS names, - GROUP_CONCAT(role, ' \\ ') AS roles + release_id + , GROUP_CONCAT(artist, ' \\ ') AS names + , GROUP_CONCAT(role, ' \\ ') AS roles FROM releases_artists GROUP BY release_id ) @@ -904,9 +1042,9 @@ def get_release_files(c: Config, release_virtual_dirname: str) -> ReleaseFiles: r""" WITH artists AS ( SELECT - track_id, - GROUP_CONCAT(artist, ' \\ ') AS names, - GROUP_CONCAT(role, ' \\ ') AS roles + track_id + , GROUP_CONCAT(artist, ' \\ ') AS names + , GROUP_CONCAT(role, ' \\ ') AS roles FROM tracks_artists GROUP BY track_id ) diff --git a/rose/cache.sql b/rose/cache.sql index 53af653..048af20 100644 --- a/rose/cache.sql +++ b/rose/cache.sql @@ -100,36 +100,30 @@ CREATE INDEX tracks_artists_track_id ON tracks_artists(track_id); CREATE INDEX tracks_artists_artist ON tracks_artists(artist); CREATE INDEX tracks_artists_artist_sanitized ON tracks_artists(artist_sanitized); -CREATE TABLE collections ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - source_path TEXT UNIQUE NOT NULL, +CREATE TABLE collages ( + name TEXT PRIMARY KEY, source_mtime TEXT NOT NULL ); -CREATE INDEX collections_source_path ON collections(source_path); -CREATE TABLE collections_releases ( - collection_id TEXT REFERENCES collections(id) ON DELETE CASCADE, +CREATE TABLE collages_releases ( + collage_name TEXT REFERENCES collages(name) ON DELETE CASCADE, release_id TEXT REFERENCES releases(id) ON DELETE CASCADE, position INTEGER NOT NULL ); -CREATE INDEX collections_releases_collection_id ON collections_releases(collection_id); -CREATE INDEX collections_releases_release_id ON collections_releases(release_id); -CREATE UNIQUE INDEX collections_releases_collection_position ON collections_releases(collection_id, position); +CREATE INDEX collages_releases_collage_name ON collages_releases(collage_name); +CREATE INDEX collages_releases_release_id ON collages_releases(release_id); +CREATE UNIQUE INDEX collages_releases_collage_position ON collages_releases(collage_name, position); CREATE TABLE playlists ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - source_path TEXT UNIQUE NOT NULL, + name TEXT PRIMARY KEY, source_mtime TEXT NOT NULL ); -CREATE INDEX playlists_source_path ON playlists(source_path); CREATE TABLE playlists_tracks ( - playlist_id TEXT REFERENCES playlists(id) ON DELETE CASCADE, + playlist_name TEXT REFERENCES playlists(name) ON DELETE CASCADE, track_id TEXT REFERENCES tracks(id) ON DELETE CASCADE, position INTEGER NOT NULL ); -CREATE INDEX playlists_tracks_playlist_id ON playlists_tracks(playlist_id); +CREATE INDEX playlists_tracks_playlist_name ON playlists_tracks(playlist_name); CREATE INDEX playlists_tracks_track_id ON playlists_tracks(track_id); -CREATE UNIQUE INDEX playlists_tracks_playlist_position ON playlists_tracks(playlist_id, position); +CREATE UNIQUE INDEX playlists_tracks_playlist_position ON playlists_tracks(playlist_name, position); diff --git a/rose/cache_test.py b/rose/cache_test.py index 13d6316..68b7165 100644 --- a/rose/cache_test.py +++ b/rose/cache_test.py @@ -3,6 +3,7 @@ from pathlib import Path import pytest +import tomllib from rose.cache import ( CACHE_SCHEMA_PATH, @@ -23,8 +24,8 @@ migrate_database, release_exists, track_exists, + update_cache, update_cache_delete_nonexistent_releases, - update_cache_for_all_releases, update_cache_for_releases, ) from rose.config import Config @@ -60,9 +61,33 @@ def test_migration(config: Config) -> None: TESTDATA = Path(__file__).resolve().parent.parent / "testdata" / "cache" TEST_RELEASE_1 = TESTDATA / "Test Release 1" TEST_RELEASE_2 = TESTDATA / "Test Release 2" +TEST_COLLAGE_1 = TESTDATA / "Collage 1" -def test_update_cache_for_release(config: Config) -> None: +def test_update_cache_all(config: Config) -> None: + """Test that the update all function works.""" + shutil.copytree(TEST_RELEASE_1, config.music_source_dir / TEST_RELEASE_1.name) + shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name) + + # Test that we prune deleted releases too. + with connect(config) as conn: + conn.execute( + """ + INSERT INTO releases (id, source_path, virtual_dirname, datafile_mtime, title, release_type, multidisc, formatted_artists) + VALUES ('aaaaaa', '/nonexistent', '999', 'nonexistent', 'aa', 'unknown', false, 'aa;aa') + """ # noqa: E501 + ) + + update_cache(config) + + with connect(config) as conn: + cursor = conn.execute("SELECT COUNT(*) FROM releases") + assert cursor.fetchone()[0] == 2 + cursor = conn.execute("SELECT COUNT(*) FROM tracks") + assert cursor.fetchone()[0] == 4 + + +def test_update_cache_releases(config: Config) -> None: release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) update_cache_for_releases(config, [release_dir]) @@ -156,7 +181,7 @@ def test_update_cache_for_release(config: Config) -> None: } -def test_update_cache_uncached_release_with_existing_id(config: Config) -> None: +def test_update_cache_releases_uncached_with_existing_id(config: Config) -> None: """Test that IDs in filenames are read and preserved.""" release_dir = config.music_source_dir / TEST_RELEASE_2.name shutil.copytree(TEST_RELEASE_2, release_dir) @@ -170,7 +195,7 @@ def test_update_cache_uncached_release_with_existing_id(config: Config) -> None: assert release_id == "ilovecarly" # Hardcoded ID for testing. -def test_update_cache_already_fully_cached_release(config: Config) -> None: +def test_update_cache_releases_already_fully_cached(config: Config) -> None: """Test that a fully cached release No Ops when updated again.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -190,7 +215,7 @@ def test_update_cache_already_fully_cached_release(config: Config) -> None: assert row["new"] -def test_update_cache_disk_update_to_cached_release(config: Config) -> None: +def test_update_cache_releases_disk_update_to_previously_cached(config: Config) -> None: """Test that a cached release is updated after a track updates.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -215,7 +240,7 @@ def test_update_cache_disk_update_to_cached_release(config: Config) -> None: assert row["new"] -def test_update_cache_disk_update_to_datafile(config: Config) -> None: +def test_update_cache_releases_disk_update_to_datafile(config: Config) -> None: """Test that a cached release is updated after a datafile updates.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -231,7 +256,7 @@ def test_update_cache_disk_update_to_datafile(config: Config) -> None: assert row["new"] -def test_update_cache_disk_upgrade_old_datafile(config: Config) -> None: +def test_update_cache_releases_disk_upgrade_old_datafile(config: Config) -> None: """Test that a legacy invalid datafile is upgraded on index.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -249,7 +274,7 @@ def test_update_cache_disk_upgrade_old_datafile(config: Config) -> None: assert "new = true" in fp.read() -def test_update_cache_disk_directory_renamed(config: Config) -> None: +def test_update_cache_releases_source_path_renamed(config: Config) -> None: """Test that a cached release is updated after a directory rename.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -271,7 +296,7 @@ def test_update_cache_disk_directory_renamed(config: Config) -> None: assert row["new"] -def test_update_cache_delete_nonexistent_releases(config: Config) -> None: +def test_update_cache_releases_delete_nonexistent(config: Config) -> None: """Test that deleted releases that are no longer on disk are cleared from cache.""" with connect(config) as conn: conn.execute( @@ -286,30 +311,7 @@ def test_update_cache_delete_nonexistent_releases(config: Config) -> None: assert cursor.fetchone()[0] == 0 -def test_update_cache_for_all_releases(config: Config) -> None: - """Test that the update all function works.""" - shutil.copytree(TEST_RELEASE_1, config.music_source_dir / TEST_RELEASE_1.name) - shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name) - - # Test that we prune deleted releases too. - with connect(config) as conn: - conn.execute( - """ - INSERT INTO releases (id, source_path, virtual_dirname, datafile_mtime, title, release_type, multidisc, formatted_artists) - VALUES ('aaaaaa', '/nonexistent', '999', 'nonexistent', 'aa', 'unknown', false, 'aa;aa') - """ # noqa: E501 - ) - - update_cache_for_all_releases(config) - - with connect(config) as conn: - cursor = conn.execute("SELECT COUNT(*) FROM releases") - assert cursor.fetchone()[0] == 2 - cursor = conn.execute("SELECT COUNT(*) FROM tracks") - assert cursor.fetchone()[0] == 4 - - -def test_update_cache_skips_empty_directory(config: Config) -> None: +def test_update_cache_releases_skips_empty_directory(config: Config) -> None: """Test that an directory with no audio files is skipped.""" rd = config.music_source_dir / "lalala" rd.mkdir() @@ -320,7 +322,7 @@ def test_update_cache_skips_empty_directory(config: Config) -> None: assert cursor.fetchone()[0] == 0 -def test_update_cache_uncaches_empty_directory(config: Config) -> None: +def test_update_cache_releases_uncaches_empty_directory(config: Config) -> None: """Test that a previously-cached directory with no audio files now is cleared from cache.""" release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) @@ -333,6 +335,48 @@ def test_update_cache_uncaches_empty_directory(config: Config) -> None: assert cursor.fetchone()[0] == 0 +def test_update_cache_collages(config: Config) -> None: + shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name) + shutil.copytree(TEST_COLLAGE_1, config.music_source_dir / "!collages") + update_cache(config) + + # Assert that the collage metadata was read correctly. + with connect(config) as conn: + cursor = conn.execute("SELECT name, source_mtime FROM collages") + rows = cursor.fetchall() + assert len(rows) == 1 + row = rows[0] + assert row["name"] == "Rose Gold" + assert row["source_mtime"] + + cursor = conn.execute("SELECT collage_name, release_id, position FROM collages_releases") + rows = cursor.fetchall() + assert len(rows) == 1 + row = rows[0] + assert row["collage_name"] == "Rose Gold" + assert row["release_id"] == "ilovecarly" + assert row["position"] == 0 + + +def test_update_cache_collages_nonexistent_release_id(config: Config) -> None: + shutil.copytree(TEST_COLLAGE_1, config.music_source_dir / "!collages") + update_cache(config) + + # Assert that a nonexistent release was not read. + with connect(config) as conn: + cursor = conn.execute("SELECT name FROM collages") + assert cursor.fetchone()["name"] == "Rose Gold" + + cursor = conn.execute("SELECT collage_name, release_id, position FROM collages_releases") + rows = cursor.fetchall() + assert not rows + + # Assert that source file was updated to remove the release. + with (config.music_source_dir / "!collages" / "Rose Gold.toml").open("rb") as fp: + data = tomllib.load(fp) + assert data["releases"] == [] + + @pytest.mark.usefixtures("seeded_cache") def test_list_releases(config: Config) -> None: albums = list(list_releases(config)) diff --git a/testdata/cache/Collage 1/Rose Gold.toml b/testdata/cache/Collage 1/Rose Gold.toml new file mode 100644 index 0000000..8a7d187 --- /dev/null +++ b/testdata/cache/Collage 1/Rose Gold.toml @@ -0,0 +1,3 @@ +[[releases]] +uuid = "ilovecarly" +description_meta = "lalala"