Skip to content

Commit

Permalink
Use the database to cache the index and metadata (#1107)
Browse files Browse the repository at this point in the history
  • Loading branch information
f-f authored Oct 30, 2023
1 parent 796d80e commit c9fd17b
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 142 deletions.
124 changes: 53 additions & 71 deletions bin/src/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,12 @@ import Data.Array.NonEmpty as NEA
import Data.Array.NonEmpty as NonEmptyArray
import Data.Codec.Argonaut.Common as CA.Common
import Data.Foldable as Foldable
import Data.JSDate as JSDate
import Data.List as List
import Data.Map as Map
import Data.Maybe as Maybe
import Data.String as String
import Effect.Aff as Aff
import Effect.Now as Now
import Effect.Ref as Ref
import Node.FS.Stats (Stats(..))
import Node.Path as Path
import Node.Process as Process
import Options.Applicative (CommandFields, Mod, Parser, ParserPrefs(..))
Expand All @@ -27,6 +24,7 @@ import Registry.Constants as Registry.Constants
import Registry.ManifestIndex as ManifestIndex
import Registry.Metadata as Metadata
import Registry.PackageName as PackageName
import Registry.Version as Version
import Spago.Bin.Flags as Flags
import Spago.Command.Build as Build
import Spago.Command.Bundle as Bundle
Expand Down Expand Up @@ -912,18 +910,44 @@ mkRegistryEnv offline = do
-- Make sure we have git and purs
git <- Git.getGit
purs <- Purs.getPurs
{ logOptions } <- ask

-- Connect to the database - we need it to keep track of when to pull the Registry,
-- so we don't do it too often
db <- liftEffect $ Db.connect
{ database: Paths.databasePath
, logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions }
}

-- we keep track of how old the latest pull was - if the last pull was recent enough
-- we just move on, otherwise run the fibers
fetchingFreshRegistry <- Registry.shouldFetchRegistryRepos db
when fetchingFreshRegistry do
-- clone the registry and index repo, or update them
logInfo "Refreshing the Registry Index..."
runSpago { logOptions, git, offline } $ parallelise
[ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of
Right _ -> pure unit
Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways"
, Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of
Right _ -> pure unit
Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways"
]

-- Now that we are up to date with the Registry we init/refresh the database
Registry.updatePackageSetsDb db

-- we make a Ref for the Index so that we can memoize the lookup of packages
-- and we don't have to read it all together
indexRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName (Map Version Manifest))
-- Prepare the functions to read the manifests and metadata - here we memoize as much
-- as we can in the DB, so we don't have to read the files every time
let
-- Manifests are immutable so we can just lookup in the DB or read from file if not there
getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest)
getManifestFromIndex name version = do
indexMap <- liftEffect (Ref.read indexRef)
case Map.lookup name indexMap of
Just meta -> pure (Map.lookup version meta)
liftEffect (Db.getManifest db name version) >>= case _ of
Just manifest -> pure (Just manifest)
Nothing -> do
-- if we don't have it we try reading it from file
-- if we don't have it we need to read it from file
-- (note that we have all the versions of a package in the same file)
logDebug $ "Reading package from Index: " <> PackageName.print name
maybeManifests <- liftAff $ ManifestIndex.readEntryFile Paths.registryIndexPath name
manifests <- map (map (\m@(Manifest m') -> Tuple m'.version m)) case maybeManifests of
Expand All @@ -932,50 +956,36 @@ mkRegistryEnv offline = do
logWarn $ "Could not read package manifests from index, proceeding anyways. Error: " <> err
pure []
let versions = Map.fromFoldable manifests
liftEffect (Ref.write (Map.insert name versions indexMap) indexRef)
-- and memoize it
for_ manifests \(Tuple _ manifest@(Manifest m)) -> do
logDebug $ "Inserting manifest in DB: " <> PackageName.print name <> " v" <> Version.print m.version
liftEffect $ Db.insertManifest db name m.version manifest
pure (Map.lookup version versions)

-- same deal for the metadata files
metadataRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName Metadata)
-- Metadata can change over time (unpublished packages, and new packages), so we need
-- to read it from file every time we have a fresh Registry
let
metadataFromFile name = do
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ]
logDebug $ "Reading metadata from file: " <> metadataFilePath
liftAff (FS.readJsonFile Metadata.codec metadataFilePath)

getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata)
getMetadata name = do
metadataMap <- liftEffect (Ref.read metadataRef)
case Map.lookup name metadataMap of
Just meta -> pure (Right meta)
Nothing -> do
-- we first try reading it from the DB
liftEffect (Db.getMetadata db name) >>= case _ of
Just metadata | not fetchingFreshRegistry -> do
logDebug $ "Got metadata from DB: " <> PackageName.print name
pure (Right metadata)
_ -> do
-- if we don't have it we try reading it from file
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ]
logDebug $ "Reading metadata from file: " <> metadataFilePath
liftAff (FS.readJsonFile Metadata.codec metadataFilePath) >>= case _ of
metadataFromFile name >>= case _ of
Left e -> pure (Left e)
Right m -> do
-- and memoize it
liftEffect (Ref.write (Map.insert name m metadataMap) metadataRef)
liftEffect (Db.insertMetadata db name m)
pure (Right m)

{ logOptions } <- ask
-- we keep track of how old the latest pull was - if the last pull was recent enough
-- we just move on, otherwise run the fibers
whenM shouldFetchRegistryRepos do
-- clone the registry and index repo, or update them
logInfo "Refreshing the Registry Index..."
runSpago { logOptions, git, offline } $ parallelise
[ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of
Right _ -> pure unit
Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways"
, Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of
Right _ -> pure unit
Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways"
]

-- Now that we are up to date with the Registry we init/refresh the database
db <- liftEffect $ Db.connect
{ database: Paths.databasePath
, logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions }
}
Registry.updatePackageSetsDb db

pure
{ getManifestFromIndex
, getMetadata
Expand Down Expand Up @@ -1020,32 +1030,4 @@ mkDocsEnv args dependencies = do
, open: args.open
}

shouldFetchRegistryRepos :: forall a. Spago (LogEnv a) Boolean
shouldFetchRegistryRepos = do
let freshRegistryCanary = Path.concat [ Paths.globalCachePath, "fresh-registry-canary.txt" ]
FS.stat freshRegistryCanary >>= case _ of
Left err -> do
-- If the stat fails the file probably does not exist
logDebug [ "Could not stat " <> freshRegistryCanary, show err ]
-- in which case we touch it and fetch
touch freshRegistryCanary
pure true
Right (Stats { mtime }) -> do
-- it does exist here, see if it's old enough, and fetch if it is
now <- liftEffect $ JSDate.now
let minutes = 15.0
let staleAfter = 1000.0 * 60.0 * minutes -- need this in millis
let isOldEnough = (JSDate.getTime now) > (JSDate.getTime mtime + staleAfter)
if isOldEnough then do
logDebug "Registry index is old, refreshing canary"
touch freshRegistryCanary
pure true
else do
logDebug "Registry index is fresh enough, moving on..."
pure false
where
touch path = do
FS.ensureFileSync path
FS.writeTextFile path ""

foreign import supportsColor :: Effect Boolean
84 changes: 56 additions & 28 deletions src/Spago/Db.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export const connectImpl = (path, logger) => {
logger("Connecting to database at " + path);
let db = new Database(path, {
fileMustExist: false,
verbose: logger,
// verbose: logger,
});
db.pragma("journal_mode = WAL");
db.pragma("foreign_keys = ON");
Expand All @@ -19,16 +19,24 @@ export const connectImpl = (path, logger) => {
, packageName TEXT NOT NULL
, packageVersion TEXT NOT NULL
, PRIMARY KEY (packageSetVersion, packageName, packageVersion)
, FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version))`).run();
// TODO: this is here as a placeholder, but not settled yet
// db.prepare(`CREATE TABLE IF NOT EXISTS package_versions
// ( name TEXT NOT NULL
// , version TEXT NOT NULL
// , published INTEGER NOT NULL
// , date TEXT NOT NULL
// , manifest TEXT NOT NULL
// , location TEXT NOT NULL
// , PRIMARY KEY (name, version))`).run();
, FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version)
)`).run();
db.prepare(`CREATE TABLE IF NOT EXISTS last_git_pull
( key TEXT PRIMARY KEY NOT NULL
, date TEXT NOT NULL
)`).run();
db.prepare(`CREATE TABLE IF NOT EXISTS package_metadata
( name TEXT PRIMARY KEY NOT NULL
, metadata TEXT NOT NULL
)`).run();
// it would be lovely if we'd have a foreign key on package_metadata, but that would
// require reading metadatas before manifests, which we can't always guarantee
db.prepare(`CREATE TABLE IF NOT EXISTS package_manifests
( name TEXT NOT NULL
, version TEXT NOT NULL
, manifest TEXT NOT NULL
, PRIMARY KEY (name, version)
)`).run();
return db;
};

Expand All @@ -38,12 +46,6 @@ export const insertPackageSetImpl = (db, packageSet) => {
).run(packageSet);
};

export const insertPackageVersionImpl = (db, packageVersion) => {
db.prepare(
"INSERT INTO package_versions (name, version, published, date, manifest, location) VALUES (@name, @version, @published, @date, @manifest, @location)"
).run(packageVersion);
}

export const insertPackageSetEntryImpl = (db, packageSetEntry) => {
db.prepare(
"INSERT INTO package_set_entries (packageSetVersion, packageName, packageVersion) VALUES (@packageSetVersion, @packageName, @packageVersion)"
Expand All @@ -64,17 +66,6 @@ export const selectPackageSetsImpl = (db) => {
return row;
}

export const selectPackageVersionImpl = (db, name, version) => {
const row = db
.prepare("SELECT * FROM package_versions WHERE name = ? AND version = ? LIMIT 1")
.get(name, version);
return row;
}

export const unpublishPackageVersionImpl = (db, name, version) => {
db.prepare("UPDATE package_versions SET published = 0 WHERE name = ? AND version = ?").run(name, version);
}

export const selectPackageSetEntriesBySetImpl = (db, packageSetVersion) => {
const row = db
.prepare("SELECT * FROM package_set_entries WHERE packageSetVersion = ?")
Expand All @@ -88,3 +79,40 @@ export const selectPackageSetEntriesByPackageImpl = (db, packageName, packageVer
.all(packageName, packageVersion);
return row;
}

export const getLastPullImpl = (db, key) => {
const row = db
.prepare("SELECT * FROM last_git_pull WHERE key = ? LIMIT 1")
.get(key);
return row?.date;
}

export const updateLastPullImpl = (db, key, date) => {
db.prepare("INSERT OR REPLACE INTO last_git_pull (key, date) VALUES (@key, @date)").run({ key, date });
}

export const getManifestImpl = (db, name, version) => {
const row = db
.prepare("SELECT * FROM package_manifests WHERE name = ? AND version = ? LIMIT 1")
.get(name, version);
return row?.manifest;
}

export const insertManifestImpl = (db, name, version, manifest) => {
db.prepare("INSERT OR IGNORE INTO package_manifests (name, version, manifest) VALUES (@name, @version, @manifest)").run({ name, version, manifest });
}

export const removeManifestImpl = (db, name, version) => {
db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version);
}

export const getMetadataImpl = (db, name) => {
const row = db
.prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1")
.get(name);
return row?.metadata;
}

export const insertMetadataImpl = (db, name, metadata) => {
db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata) VALUES (@name, @metadata)").run({ name, metadata });
}
Loading

0 comments on commit c9fd17b

Please sign in to comment.