Skip to content

Commit

Permalink
Add a single entry point python file for fs (depends on spack-python)
Browse files Browse the repository at this point in the history
  • Loading branch information
psakievich committed May 23, 2024
1 parent 69a356a commit 795198d
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 19 deletions.
3 changes: 2 additions & 1 deletion images/ci-prune-buildcache/buildcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@


class Object:
def __init__(self, bucket_name: str, key: str, last_modified):
def __init__(self, bucket_name: str, key: str, last_modified, size = 0):
self.bucket_name = bucket_name
self.key = key
self.size = size
if isinstance(last_modified, datetime):
self.last_modified = last_modified
else:
Expand Down
35 changes: 35 additions & 0 deletions images/ci-prune-buildcache/buildcache_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env spack-python

# copy of https://github.com/sandialabs/spack-manager/blob/main/manager/manager_cmds/cache_query.py
# as a stand alone script
# query the buildcache like `spack find`

import argparse

import spack.binary_distribution as bindist
import spack.cmd as cmd
import spack.cmd.find


parser = argparse.ArgumentParser()
spack.cmd.find.setup_parser(parser)

def cache_search(self, **kwargs):
qspecs = spack.cmd.parse_specs(self.values)
search_engine = bindist.BinaryCacheQuery(True)
results = {}
for q in qspecs:
hits = search_engine(str(q), **kwargs)
for hit in hits:
results[hit.dag_hash()] = hit
return sorted(results.values())

spack.cmd.common.arguments.ConstraintAction._specs = cache_search

def find(parser, args):
spack.cmd.find.find(parser, args)

if __name__ == "__main__":
args = parser.parse_args()
find(parser, args)

165 changes: 165 additions & 0 deletions images/ci-prune-buildcache/cache-prune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env python3

import argparse
import helper
import math
import os
import subprocess

from datetime import datetime, timedelta, timezone
from fs_buildcache import FileSystemBuildCache
from pruner import pruner_factory, PRUNER_TYPES

def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
size = round(size_bytes / p, 2)
return f"{size} {size_name[i]}"

def configure_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"path",
help="location of the buildcache",
)
parser.add_argument(
"--start-date",
help="Starting date for pruning window",
default=datetime.now(timezone.utc).isoformat(),
)
parser.add_argument(
"--since-days",
help="Ending date for pruning window",
type=int,
default=0
)
parser.add_argument(
"-j", "--nprocs",
help="Numer of process to use",
type=int,
metavar="N",
default=1
)
parser.add_argument(
"--prune-hashes",
help="json file with hash list to prune",
type=argparse.FileType("r"),
metavar="prune.json",
)
parser.add_argument(
"--keep-hashes",
help="json file with hash list to keep",
type=argparse.FileType("r"),
metavar="keep.json",
)
parser.add_argument(
"--keep-specs",
help="specs to preserve in the cache (includes dependencies)",
nargs="+",
)
parser.add_argument(
"-o", "--output-dir",
default=os.getcwd(),
help="output directory",
)
parser.add_argument(
"-S", "--suffix",
help="logging file suffix",
)
parser.add_argument(
"-D", "--delete",
help="attempt to delete the files",
action="store_true",
)
parser.add_argument(
"-m", "--method",
help="pruning method to use on the cache",
choices = list(PRUNER_TYPES.keys()),
default = "direct",
)

return parser


def get_cache_hashes_from_specs(*args, **kwargs):
command = ['spack-python', 'buildcache_query.py', '--format', '{hash}']
command.extend([*args])
result = subprocess.check_output(command, universal_newlines=True).strip().split()
return result

def get_keep_hashes(args: argparse.Namespace):
keep_hashes=[]
if args.keep_hashes:
keep_hashes.extend(helper.load_json(args.keep_hashes))
if args.keep_specs:
keep_hashes.extend(get_cache_hashes_from_specs("--deps", *args.keep_specs))
return keep_hashes

if __name__=="__main__":
args = configure_parser().parse_args()

os.makedirs(args.output_dir, exist_ok=True)


if not args.suffix:
log_suffix = "_" + args.method
else:
log_suffix = args.suffix

keep_hashes=get_keep_hashes(args)

cache = FileSystemBuildCache(args.path)

now = datetime.fromisoformat(args.start_date)
time_window = now - timedelta(days=args.since_days)

# combine start date and delta for passing to pruners
args.start_date = time_window

pruner = pruner_factory(cache, args.method, args, keep_hashes, since=time_window)

print("-- Computing prunable hashes")
prunable_hashes = []
if args.prune_hashes:
prunable_hashes.extend( helper.load_json(args.prune_hashes))
else:
prunable_hashes.extend(pruner.determine_prunable_hashes())

prune_hash_file = f"{args.output_dir}/prunable-hashes-{log_suffix}.txt"
with open(f"{prune_hash_file}", "w") as fd:
fd.writelines("\n".join(prunable_hashes))

if prunable_hashes:
print("-- Finding prunable files")

pruned = pruner.prune(prunable_hashes)

pruned_keys = [ obj.key for obj in pruned ]

print(f"-- Found prunable {len(pruned)} files in buildcache")
total_size_human = convert_size(sum(obj.size for obj in pruned))
print(f"-- Total Size of prunable files is {total_size_human}")

prune_list_file = f"{args.output_dir}/prunable-files-{log_suffix}.txt"
with open(f"{prune_list_file}", "w") as fd:
fd.writelines("\n".join(pruned_keys))
else:
print("-- Nothing to prune")

if args.delete:
print("-- Pruning build cache")
err, fail = cache.delete(pruned_keys, processes=args.nprocs)
fname_template = f"{args.output_dir}/delete-{{0}}-{log_suffix}.json"
if err:
print(f"errors found")
with open(fname_template.format("errors")) as fd:
helper.write_json(fd, err)

if fail:
print(f"failures found")
with open(fname_template.format("failures")) as fd:
helper.write_json(fd, fail)

5 changes: 3 additions & 2 deletions images/ci-prune-buildcache/fs_buildcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

class FileSystemObject(Object):
def __init__(self, entry: os.DirEntry):
lm = datetime.fromtimestamp(entry.stat().st_mtime)
super().__init__(bucket_name=None, key=entry.path, last_modified = lm)
stat = entry.stat()
lm = datetime.fromtimestamp(stat.st_mtime)
super().__init__(bucket_name=None, key=entry.path, last_modified = lm, size = stat.st_size)
if entry.is_file():
self._get_method = self._get_file
elif entry.is_dir():
Expand Down
60 changes: 44 additions & 16 deletions images/ci-prune-buildcache/pruner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
from buildcache import BuildCache, Object
from datetime import datetime, timedelta, timezone
import argparse
import helper
import multiprocessing.pool as pool


CLI_ARGS_DICT = {
"start_date": None,
"delete": False,
"nprocs": 1,
}

DEFAULT_CLI_ARGS = argparse.Namespace()
DEFAULT_CLI_ARGS.__dict__.update(CLI_ARGS_DICT)

class PrunedObject(Object):
def __init__(self, obj: Object, method: str):
self.__dict__.update(obj.__dict__)
Expand All @@ -14,15 +24,18 @@ class BasePruner:
spec_ext = (".spec.json", ".spec.yaml", ".spec.json.sig")
tarball_ext = (".spack", ".tar.gz")

def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args):
def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args=DEFAULT_CLI_ARGS):
self.buildcache = buildcache
self.keep_hashes = keep_hash_list
self.cli_args = cli_args

self.prunable_hashes = set()
self.prune_ext = self.spec_ext + self.tarball_ext

self.start_date = datetime.fromisoformat(cli_args.start_date)
if isinstance(cli_args.start_date, datetime):
self.start_date = cli_args.start_date
else:
self.start_date = datetime.fromisoformat(cli_args.start_date)

self.enable_delete = self.cli_args.delete

Expand Down Expand Up @@ -72,7 +85,7 @@ def _list(self, ext=None, wrapped=True):
else:
yield obj

def prune(self, prunable_hashes=None):
def prune(self, prunable_hashes=None, compute_size=False):
""" Prune the buildcache
"""
# Get the list of prunable hashes
Expand Down Expand Up @@ -191,7 +204,7 @@ class OrphanPruner(BasePruner):
"""Pruning Strategy that looks for .spack binaries with no matching spec.json
buildcache
"""
def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args):
def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args=DEFAULT_CLI_ARGS):
BasePruner.__init__(self, buildcache, None, cli_args)
self.date_cutoff = datetime.fromisoformat(cli_args.start_date)

Expand Down Expand Up @@ -258,20 +271,35 @@ def a_not_in_b(a, b):

return self.prunable_hashes

PRUNER_TYPES = {
"direct": DirectPruner,
"orphan": OrphanPruner,
"index": IndexPruner,
}

def pruner_factory(cache, args, keep_hashes=[], since=None):
def pruner_factory(cache, method, args=DEFAULT_CLI_ARGS, keep_hashes=[], since=None):
""" Factory with variable args a kwargs """
# make sure only one type was supplied
type_sum = int(args.direct) + int(args.orphaned) + int(args.check_index)
assert type_sum == 1

if args.direct:
return DirectPruner(cache, keep_hashes, args)
elif args.orphaned:
return OrphanPruner(cache, since, args)
elif args.check_index:
return IndexPruner(cache, keep_hashes, args)
obj = PRUNER_TYPES.get(method, None)

# check's that the arguments passed meet the needs of the pruner objects
new_args = argparse.Namespace()
new_args.__dict__.update(CLI_ARGS_DICT)
# Update the first namespace with values from the second namespace
for key, value in args.__dict__.items():
if key in new_args.__dict__ and key in args.__dict__:
new_args.__dict__[key] = value
elif key not in new_args.__dict__:
continue
else:
raise Exception(f"Missing {key} in the arguments passed to the pruner factory")



if method=="direct" or method=="index":
return obj(cache, keep_hashes, new_args)
elif method=="orphan":
return obj(cache, since, new_args)
else:
raise Exception("Pruner type not implemented")
raise Exception(f"Pruner {method} type not implemented")


0 comments on commit 795198d

Please sign in to comment.