From c9fd17bc2b37b5b3054b49a7d55d2cac514d0b33 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Mon, 30 Oct 2023 15:17:24 +0200 Subject: [PATCH] Use the database to cache the index and metadata (#1107) --- bin/src/Main.purs | 124 +++++++++++++++++----------------------- src/Spago/Db.js | 84 ++++++++++++++++++--------- src/Spago/Db.purs | 97 +++++++++++++++++-------------- src/Spago/Registry.purs | 28 +++++++++ 4 files changed, 191 insertions(+), 142 deletions(-) diff --git a/bin/src/Main.purs b/bin/src/Main.purs index c467a8e9b..902e88bfd 100644 --- a/bin/src/Main.purs +++ b/bin/src/Main.purs @@ -8,15 +8,12 @@ import Data.Array.NonEmpty as NEA import Data.Array.NonEmpty as NonEmptyArray import Data.Codec.Argonaut.Common as CA.Common import Data.Foldable as Foldable -import Data.JSDate as JSDate import Data.List as List import Data.Map as Map import Data.Maybe as Maybe import Data.String as String import Effect.Aff as Aff import Effect.Now as Now -import Effect.Ref as Ref -import Node.FS.Stats (Stats(..)) import Node.Path as Path import Node.Process as Process import Options.Applicative (CommandFields, Mod, Parser, ParserPrefs(..)) @@ -27,6 +24,7 @@ import Registry.Constants as Registry.Constants import Registry.ManifestIndex as ManifestIndex import Registry.Metadata as Metadata import Registry.PackageName as PackageName +import Registry.Version as Version import Spago.Bin.Flags as Flags import Spago.Command.Build as Build import Spago.Command.Bundle as Bundle @@ -912,18 +910,44 @@ mkRegistryEnv offline = do -- Make sure we have git and purs git <- Git.getGit purs <- Purs.getPurs + { logOptions } <- ask + + -- Connect to the database - we need it to keep track of when to pull the Registry, + -- so we don't do it too often + db <- liftEffect $ Db.connect + { database: Paths.databasePath + , logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions } + } + + -- we keep track of how old the latest pull was - if the last pull was recent enough + -- we just move on, otherwise run the fibers + fetchingFreshRegistry <- Registry.shouldFetchRegistryRepos db + when fetchingFreshRegistry do + -- clone the registry and index repo, or update them + logInfo "Refreshing the Registry Index..." + runSpago { logOptions, git, offline } $ parallelise + [ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of + Right _ -> pure unit + Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways" + , Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of + Right _ -> pure unit + Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways" + ] + + -- Now that we are up to date with the Registry we init/refresh the database + Registry.updatePackageSetsDb db - -- we make a Ref for the Index so that we can memoize the lookup of packages - -- and we don't have to read it all together - indexRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName (Map Version Manifest)) + -- Prepare the functions to read the manifests and metadata - here we memoize as much + -- as we can in the DB, so we don't have to read the files every time let + -- Manifests are immutable so we can just lookup in the DB or read from file if not there getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest) getManifestFromIndex name version = do - indexMap <- liftEffect (Ref.read indexRef) - case Map.lookup name indexMap of - Just meta -> pure (Map.lookup version meta) + liftEffect (Db.getManifest db name version) >>= case _ of + Just manifest -> pure (Just manifest) Nothing -> do - -- if we don't have it we try reading it from file + -- if we don't have it we need to read it from file + -- (note that we have all the versions of a package in the same file) logDebug $ "Reading package from Index: " <> PackageName.print name maybeManifests <- liftAff $ ManifestIndex.readEntryFile Paths.registryIndexPath name manifests <- map (map (\m@(Manifest m') -> Tuple m'.version m)) case maybeManifests of @@ -932,50 +956,36 @@ mkRegistryEnv offline = do logWarn $ "Could not read package manifests from index, proceeding anyways. Error: " <> err pure [] let versions = Map.fromFoldable manifests - liftEffect (Ref.write (Map.insert name versions indexMap) indexRef) + -- and memoize it + for_ manifests \(Tuple _ manifest@(Manifest m)) -> do + logDebug $ "Inserting manifest in DB: " <> PackageName.print name <> " v" <> Version.print m.version + liftEffect $ Db.insertManifest db name m.version manifest pure (Map.lookup version versions) - -- same deal for the metadata files - metadataRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName Metadata) + -- Metadata can change over time (unpublished packages, and new packages), so we need + -- to read it from file every time we have a fresh Registry let + metadataFromFile name = do + let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ] + logDebug $ "Reading metadata from file: " <> metadataFilePath + liftAff (FS.readJsonFile Metadata.codec metadataFilePath) + getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata) getMetadata name = do - metadataMap <- liftEffect (Ref.read metadataRef) - case Map.lookup name metadataMap of - Just meta -> pure (Right meta) - Nothing -> do + -- we first try reading it from the DB + liftEffect (Db.getMetadata db name) >>= case _ of + Just metadata | not fetchingFreshRegistry -> do + logDebug $ "Got metadata from DB: " <> PackageName.print name + pure (Right metadata) + _ -> do -- if we don't have it we try reading it from file - let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ] - logDebug $ "Reading metadata from file: " <> metadataFilePath - liftAff (FS.readJsonFile Metadata.codec metadataFilePath) >>= case _ of + metadataFromFile name >>= case _ of Left e -> pure (Left e) Right m -> do -- and memoize it - liftEffect (Ref.write (Map.insert name m metadataMap) metadataRef) + liftEffect (Db.insertMetadata db name m) pure (Right m) - { logOptions } <- ask - -- we keep track of how old the latest pull was - if the last pull was recent enough - -- we just move on, otherwise run the fibers - whenM shouldFetchRegistryRepos do - -- clone the registry and index repo, or update them - logInfo "Refreshing the Registry Index..." - runSpago { logOptions, git, offline } $ parallelise - [ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of - Right _ -> pure unit - Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways" - , Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of - Right _ -> pure unit - Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways" - ] - - -- Now that we are up to date with the Registry we init/refresh the database - db <- liftEffect $ Db.connect - { database: Paths.databasePath - , logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions } - } - Registry.updatePackageSetsDb db - pure { getManifestFromIndex , getMetadata @@ -1020,32 +1030,4 @@ mkDocsEnv args dependencies = do , open: args.open } -shouldFetchRegistryRepos :: forall a. Spago (LogEnv a) Boolean -shouldFetchRegistryRepos = do - let freshRegistryCanary = Path.concat [ Paths.globalCachePath, "fresh-registry-canary.txt" ] - FS.stat freshRegistryCanary >>= case _ of - Left err -> do - -- If the stat fails the file probably does not exist - logDebug [ "Could not stat " <> freshRegistryCanary, show err ] - -- in which case we touch it and fetch - touch freshRegistryCanary - pure true - Right (Stats { mtime }) -> do - -- it does exist here, see if it's old enough, and fetch if it is - now <- liftEffect $ JSDate.now - let minutes = 15.0 - let staleAfter = 1000.0 * 60.0 * minutes -- need this in millis - let isOldEnough = (JSDate.getTime now) > (JSDate.getTime mtime + staleAfter) - if isOldEnough then do - logDebug "Registry index is old, refreshing canary" - touch freshRegistryCanary - pure true - else do - logDebug "Registry index is fresh enough, moving on..." - pure false - where - touch path = do - FS.ensureFileSync path - FS.writeTextFile path "" - foreign import supportsColor :: Effect Boolean diff --git a/src/Spago/Db.js b/src/Spago/Db.js index 3b71e22ae..e55d817c6 100644 --- a/src/Spago/Db.js +++ b/src/Spago/Db.js @@ -4,7 +4,7 @@ export const connectImpl = (path, logger) => { logger("Connecting to database at " + path); let db = new Database(path, { fileMustExist: false, - verbose: logger, + // verbose: logger, }); db.pragma("journal_mode = WAL"); db.pragma("foreign_keys = ON"); @@ -19,16 +19,24 @@ export const connectImpl = (path, logger) => { , packageName TEXT NOT NULL , packageVersion TEXT NOT NULL , PRIMARY KEY (packageSetVersion, packageName, packageVersion) - , FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version))`).run(); - // TODO: this is here as a placeholder, but not settled yet - // db.prepare(`CREATE TABLE IF NOT EXISTS package_versions - // ( name TEXT NOT NULL - // , version TEXT NOT NULL - // , published INTEGER NOT NULL - // , date TEXT NOT NULL - // , manifest TEXT NOT NULL - // , location TEXT NOT NULL - // , PRIMARY KEY (name, version))`).run(); + , FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version) + )`).run(); + db.prepare(`CREATE TABLE IF NOT EXISTS last_git_pull + ( key TEXT PRIMARY KEY NOT NULL + , date TEXT NOT NULL + )`).run(); + db.prepare(`CREATE TABLE IF NOT EXISTS package_metadata + ( name TEXT PRIMARY KEY NOT NULL + , metadata TEXT NOT NULL + )`).run(); + // it would be lovely if we'd have a foreign key on package_metadata, but that would + // require reading metadatas before manifests, which we can't always guarantee + db.prepare(`CREATE TABLE IF NOT EXISTS package_manifests + ( name TEXT NOT NULL + , version TEXT NOT NULL + , manifest TEXT NOT NULL + , PRIMARY KEY (name, version) + )`).run(); return db; }; @@ -38,12 +46,6 @@ export const insertPackageSetImpl = (db, packageSet) => { ).run(packageSet); }; -export const insertPackageVersionImpl = (db, packageVersion) => { - db.prepare( - "INSERT INTO package_versions (name, version, published, date, manifest, location) VALUES (@name, @version, @published, @date, @manifest, @location)" - ).run(packageVersion); -} - export const insertPackageSetEntryImpl = (db, packageSetEntry) => { db.prepare( "INSERT INTO package_set_entries (packageSetVersion, packageName, packageVersion) VALUES (@packageSetVersion, @packageName, @packageVersion)" @@ -64,17 +66,6 @@ export const selectPackageSetsImpl = (db) => { return row; } -export const selectPackageVersionImpl = (db, name, version) => { - const row = db - .prepare("SELECT * FROM package_versions WHERE name = ? AND version = ? LIMIT 1") - .get(name, version); - return row; -} - -export const unpublishPackageVersionImpl = (db, name, version) => { - db.prepare("UPDATE package_versions SET published = 0 WHERE name = ? AND version = ?").run(name, version); -} - export const selectPackageSetEntriesBySetImpl = (db, packageSetVersion) => { const row = db .prepare("SELECT * FROM package_set_entries WHERE packageSetVersion = ?") @@ -88,3 +79,40 @@ export const selectPackageSetEntriesByPackageImpl = (db, packageName, packageVer .all(packageName, packageVersion); return row; } + +export const getLastPullImpl = (db, key) => { + const row = db + .prepare("SELECT * FROM last_git_pull WHERE key = ? LIMIT 1") + .get(key); + return row?.date; +} + +export const updateLastPullImpl = (db, key, date) => { + db.prepare("INSERT OR REPLACE INTO last_git_pull (key, date) VALUES (@key, @date)").run({ key, date }); +} + +export const getManifestImpl = (db, name, version) => { + const row = db + .prepare("SELECT * FROM package_manifests WHERE name = ? AND version = ? LIMIT 1") + .get(name, version); + return row?.manifest; +} + +export const insertManifestImpl = (db, name, version, manifest) => { + db.prepare("INSERT OR IGNORE INTO package_manifests (name, version, manifest) VALUES (@name, @version, @manifest)").run({ name, version, manifest }); +} + +export const removeManifestImpl = (db, name, version) => { + db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version); +} + +export const getMetadataImpl = (db, name) => { + const row = db + .prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1") + .get(name); + return row?.metadata; +} + +export const insertMetadataImpl = (db, name, metadata) => { + db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata) VALUES (@name, @metadata)").run({ name, metadata }); +} diff --git a/src/Spago/Db.purs b/src/Spago/Db.purs index 2c69e90e3..ba6bfaaab 100644 --- a/src/Spago/Db.purs +++ b/src/Spago/Db.purs @@ -5,33 +5,38 @@ module Spago.Db , PackageSetEntry , PackageVersion , connect - , selectPackageSets - , selectLatestPackageSetByCompiler + , getLastPull + , getManifest + , getMetadata + , insertManifest + , insertMetadata , insertPackageSet , insertPackageSetEntry , packageSetCodec + , selectLatestPackageSetByCompiler + , selectPackageSets + , updateLastPull ) where import Spago.Prelude import Data.Array as Array -import Data.Codec.Argonaut as Json import Data.Codec.Argonaut.Record as CA.Record import Data.DateTime (Date, DateTime(..)) import Data.DateTime as Date +import Data.Either as Either import Data.Formatter.DateTime as DateTime +import Data.Map as Map import Data.Nullable (Nullable) import Data.Nullable as Nullable -import Effect.Uncurried (EffectFn1, EffectFn2, EffectFn3) +import Effect.Uncurried (EffectFn1, EffectFn2, EffectFn3, EffectFn4) import Effect.Uncurried as Uncurried -import Node.Path as Path import Registry.Internal.Codec as Internal.Codec import Registry.Internal.Format as Internal.Format -import Registry.Location as Location import Registry.Manifest as Manifest +import Registry.Metadata as Metadata import Registry.PackageName as PackageName import Registry.Version as Version -import Spago.Paths as Paths -------------------------------------------------------------------------------- -- API @@ -47,9 +52,6 @@ connect { database, logger } = Uncurried.runEffectFn2 connectImpl database (Uncu insertPackageSet :: Db -> PackageSet -> Effect Unit insertPackageSet db = Uncurried.runEffectFn2 insertPackageSetImpl db <<< packageSetToJs -insertPackageVersion :: Db -> PackageVersion -> Effect Unit -insertPackageVersion db = Uncurried.runEffectFn2 insertPackageVersionImpl db <<< packageVersionToJs - insertPackageSetEntry :: Db -> PackageSetEntry -> Effect Unit insertPackageSetEntry db = Uncurried.runEffectFn2 insertPackageSetEntryImpl db <<< packageSetEntryToJs @@ -63,14 +65,6 @@ selectLatestPackageSetByCompiler db compiler = do maybePackageSet <- Nullable.toMaybe <$> Uncurried.runEffectFn2 selectLatestPackageSetByCompilerImpl db (Version.print compiler) pure $ packageSetFromJs =<< maybePackageSet -selectPackageVersion :: Db -> PackageName -> Version -> Effect (Maybe PackageVersion) -selectPackageVersion db packageName version = do - maybePackageVersion <- Nullable.toMaybe <$> Uncurried.runEffectFn3 selectPackageVersionImpl db (PackageName.print packageName) (Version.print version) - pure $ packageVersionFromJs =<< maybePackageVersion - -unpublishPackageVersion :: Db -> PackageName -> Version -> Effect Unit -unpublishPackageVersion db packageName version = Uncurried.runEffectFn3 unpublishPackageVersionImpl db (PackageName.print packageName) (Version.print version) - selectPackageSetEntriesBySet :: Db -> Version -> Effect (Array PackageSetEntry) selectPackageSetEntriesBySet db packageSetVersion = do packageSetEntries <- Uncurried.runEffectFn2 selectPackageSetEntriesBySetImpl db (Version.print packageSetVersion) @@ -81,6 +75,34 @@ selectPackageSetEntriesByPackage db packageName version = do packageSetEntries <- Uncurried.runEffectFn3 selectPackageSetEntriesByPackageImpl db (PackageName.print packageName) (Version.print version) pure $ Array.mapMaybe packageSetEntryFromJs packageSetEntries +getLastPull :: Db -> String -> Effect (Maybe DateTime) +getLastPull db key = do + maybePull <- Nullable.toMaybe <$> Uncurried.runEffectFn2 getLastPullImpl db key + pure $ (Either.hush <<< DateTime.unformat Internal.Format.iso8601DateTime) =<< maybePull + +updateLastPull :: Db -> String -> DateTime -> Effect Unit +updateLastPull db key date = Uncurried.runEffectFn3 updateLastPullImpl db key (DateTime.format Internal.Format.iso8601DateTime date) + +getManifest :: Db -> PackageName -> Version -> Effect (Maybe Manifest) +getManifest db packageName version = do + maybeManifest <- Nullable.toMaybe <$> Uncurried.runEffectFn3 getManifestImpl db (PackageName.print packageName) (Version.print version) + pure $ (Either.hush <<< parseJson Manifest.codec) =<< maybeManifest + +insertManifest :: Db -> PackageName -> Version -> Manifest -> Effect Unit +insertManifest db packageName version manifest = Uncurried.runEffectFn4 insertManifestImpl db (PackageName.print packageName) (Version.print version) (printJson Manifest.codec manifest) + +getMetadata :: Db -> PackageName -> Effect (Maybe Metadata) +getMetadata db packageName = do + maybeMetadata <- Nullable.toMaybe <$> Uncurried.runEffectFn2 getMetadataImpl db (PackageName.print packageName) + pure $ (Either.hush <<< parseJson Metadata.codec) =<< maybeMetadata + +insertMetadata :: Db -> PackageName -> Metadata -> Effect Unit +insertMetadata db packageName metadata@(Metadata { unpublished }) = do + Uncurried.runEffectFn3 insertMetadataImpl db (PackageName.print packageName) (printJson Metadata.codec metadata) + -- we also do a pass of removing the cached manifests that have been unpublished + for_ (Map.toUnfoldable unpublished :: Array _) \(Tuple version _) -> do + Uncurried.runEffectFn3 removeManifestImpl db (PackageName.print packageName) (Version.print version) + -------------------------------------------------------------------------------- -- Table types and conversions @@ -143,25 +165,6 @@ packageSetFromJs p = hush do date <- map Date.date $ DateTime.unformat Internal.Format.iso8601Date p.date pure $ { version, compiler, date } -packageVersionToJs :: PackageVersion -> PackageVersionJs -packageVersionToJs { name, version, published, date, manifest, location } = - { name: PackageName.print name - , version: Version.print version - , published: if published then 1 else 0 - , date: DateTime.format Internal.Format.iso8601DateTime date - , manifest: printJson Manifest.codec manifest - , location: printJson Location.codec location - } - -packageVersionFromJs :: PackageVersionJs -> Maybe PackageVersion -packageVersionFromJs p = hush do - name <- PackageName.parse p.name - version <- Version.parse p.version - date <- DateTime.unformat Internal.Format.iso8601DateTime p.date - manifest <- lmap Json.printJsonDecodeError $ parseJson Manifest.codec p.manifest - location <- lmap Json.printJsonDecodeError $ parseJson Location.codec p.location - pure $ { name, version, published: p.published == 1, date, manifest, location } - packageSetEntryToJs :: PackageSetEntry -> PackageSetEntryJs packageSetEntryToJs { packageSetVersion, packageName, packageVersion } = { packageSetVersion: Version.print packageSetVersion @@ -193,18 +196,26 @@ foreign import connectImpl :: EffectFn2 FilePath (EffectFn1 String Unit) Db foreign import insertPackageSetImpl :: EffectFn2 Db PackageSetJs Unit -foreign import insertPackageVersionImpl :: EffectFn2 Db PackageVersionJs Unit - foreign import insertPackageSetEntryImpl :: EffectFn2 Db PackageSetEntryJs Unit foreign import selectLatestPackageSetByCompilerImpl :: EffectFn2 Db String (Nullable PackageSetJs) foreign import selectPackageSetsImpl :: EffectFn1 Db (Array PackageSetJs) -foreign import selectPackageVersionImpl :: EffectFn3 Db String String (Nullable PackageVersionJs) - -foreign import unpublishPackageVersionImpl :: EffectFn3 Db String String Unit - foreign import selectPackageSetEntriesBySetImpl :: EffectFn2 Db String (Array PackageSetEntryJs) foreign import selectPackageSetEntriesByPackageImpl :: EffectFn3 Db String String (Array PackageSetEntryJs) + +foreign import getLastPullImpl :: EffectFn2 Db String (Nullable String) + +foreign import updateLastPullImpl :: EffectFn3 Db String String Unit + +foreign import getManifestImpl :: EffectFn3 Db String String (Nullable String) + +foreign import insertManifestImpl :: EffectFn4 Db String String String Unit + +foreign import removeManifestImpl :: EffectFn3 Db String String Unit + +foreign import getMetadataImpl :: EffectFn2 Db String (Nullable String) + +foreign import insertMetadataImpl :: EffectFn3 Db String String Unit diff --git a/src/Spago/Registry.purs b/src/Spago/Registry.purs index db95e1c96..d723baf19 100644 --- a/src/Spago/Registry.purs +++ b/src/Spago/Registry.purs @@ -3,10 +3,13 @@ module Spago.Registry where import Spago.Prelude import Data.Array as Array +import Data.DateTime as DateTime import Data.Map as Map import Data.Set as Set import Data.String (Pattern(..)) import Data.String as String +import Data.Time.Duration (Minutes(..)) +import Effect.Now as Now import Node.Path as Path import Registry.PackageSet (PackageSet(..)) import Registry.PackageSet as PackageSet @@ -118,3 +121,28 @@ isVersionCompatible installedVersion minVersion = [ 0, b, c ], [ 0, y, z ] | b == y && c >= z -> true [ a, b, _c ], [ x, y, _z ] | a /= 0 && a == x && b >= y -> true _, _ -> false + +-- | Check if we have fetched the registry recently enough, so we don't hit the net all the time +shouldFetchRegistryRepos :: forall a. Db -> Spago (LogEnv a) Boolean +shouldFetchRegistryRepos db = do + now <- liftEffect $ Now.nowDateTime + let registryKey = "registry" + maybeLastRegistryFetch <- liftEffect $ Db.getLastPull db registryKey + case maybeLastRegistryFetch of + -- No record, so we have to fetch + Nothing -> do + logDebug "No record of last registry pull, will fetch" + liftEffect $ Db.updateLastPull db registryKey now + pure true + -- We have a record, so we check if it's old enough + Just lastRegistryFetch -> do + let staleAfter = Minutes 15.0 + let (timeDiff :: Minutes) = DateTime.diff now lastRegistryFetch + let isOldEnough = timeDiff > staleAfter + if isOldEnough then do + logDebug "Registry is old, refreshing" + liftEffect $ Db.updateLastPull db registryKey now + pure true + else do + logDebug "Registry is fresh enough, moving on..." + pure false