From 795198d344e5122a513d6c5e6fbb6c3accea84f4 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 23 May 2024 14:06:31 -0600 Subject: [PATCH] Add a single entry point python file for fs (depends on spack-python) --- images/ci-prune-buildcache/buildcache.py | 3 +- .../ci-prune-buildcache/buildcache_query.py | 35 ++++ images/ci-prune-buildcache/cache-prune.py | 165 ++++++++++++++++++ images/ci-prune-buildcache/fs_buildcache.py | 5 +- images/ci-prune-buildcache/pruner.py | 60 +++++-- 5 files changed, 249 insertions(+), 19 deletions(-) create mode 100755 images/ci-prune-buildcache/buildcache_query.py create mode 100755 images/ci-prune-buildcache/cache-prune.py diff --git a/images/ci-prune-buildcache/buildcache.py b/images/ci-prune-buildcache/buildcache.py index 0858c3df0..550bb0536 100644 --- a/images/ci-prune-buildcache/buildcache.py +++ b/images/ci-prune-buildcache/buildcache.py @@ -6,9 +6,10 @@ class Object: - def __init__(self, bucket_name: str, key: str, last_modified): + def __init__(self, bucket_name: str, key: str, last_modified, size = 0): self.bucket_name = bucket_name self.key = key + self.size = size if isinstance(last_modified, datetime): self.last_modified = last_modified else: diff --git a/images/ci-prune-buildcache/buildcache_query.py b/images/ci-prune-buildcache/buildcache_query.py new file mode 100755 index 000000000..84526499e --- /dev/null +++ b/images/ci-prune-buildcache/buildcache_query.py @@ -0,0 +1,35 @@ +#!/usr/bin/env spack-python + +# copy of https://github.com/sandialabs/spack-manager/blob/main/manager/manager_cmds/cache_query.py +# as a stand alone script +# query the buildcache like `spack find` + +import argparse + +import spack.binary_distribution as bindist +import spack.cmd as cmd +import spack.cmd.find + + +parser = argparse.ArgumentParser() +spack.cmd.find.setup_parser(parser) + +def cache_search(self, **kwargs): + qspecs = spack.cmd.parse_specs(self.values) + search_engine = bindist.BinaryCacheQuery(True) + results = {} + for q in qspecs: + hits = search_engine(str(q), **kwargs) + for hit in hits: + results[hit.dag_hash()] = hit + return sorted(results.values()) + +spack.cmd.common.arguments.ConstraintAction._specs = cache_search + +def find(parser, args): + spack.cmd.find.find(parser, args) + +if __name__ == "__main__": + args = parser.parse_args() + find(parser, args) + diff --git a/images/ci-prune-buildcache/cache-prune.py b/images/ci-prune-buildcache/cache-prune.py new file mode 100755 index 000000000..e8242a475 --- /dev/null +++ b/images/ci-prune-buildcache/cache-prune.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 + +import argparse +import helper +import math +import os +import subprocess + +from datetime import datetime, timedelta, timezone +from fs_buildcache import FileSystemBuildCache +from pruner import pruner_factory, PRUNER_TYPES + +def convert_size(size_bytes): + if size_bytes == 0: + return "0B" + size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + size = round(size_bytes / p, 2) + return f"{size} {size_name[i]}" + +def configure_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "path", + help="location of the buildcache", + ) + parser.add_argument( + "--start-date", + help="Starting date for pruning window", + default=datetime.now(timezone.utc).isoformat(), + ) + parser.add_argument( + "--since-days", + help="Ending date for pruning window", + type=int, + default=0 + ) + parser.add_argument( + "-j", "--nprocs", + help="Numer of process to use", + type=int, + metavar="N", + default=1 + ) + parser.add_argument( + "--prune-hashes", + help="json file with hash list to prune", + type=argparse.FileType("r"), + metavar="prune.json", + ) + parser.add_argument( + "--keep-hashes", + help="json file with hash list to keep", + type=argparse.FileType("r"), + metavar="keep.json", + ) + parser.add_argument( + "--keep-specs", + help="specs to preserve in the cache (includes dependencies)", + nargs="+", + ) + parser.add_argument( + "-o", "--output-dir", + default=os.getcwd(), + help="output directory", + ) + parser.add_argument( + "-S", "--suffix", + help="logging file suffix", + ) + parser.add_argument( + "-D", "--delete", + help="attempt to delete the files", + action="store_true", + ) + parser.add_argument( + "-m", "--method", + help="pruning method to use on the cache", + choices = list(PRUNER_TYPES.keys()), + default = "direct", + ) + + return parser + + +def get_cache_hashes_from_specs(*args, **kwargs): + command = ['spack-python', 'buildcache_query.py', '--format', '{hash}'] + command.extend([*args]) + result = subprocess.check_output(command, universal_newlines=True).strip().split() + return result + +def get_keep_hashes(args: argparse.Namespace): + keep_hashes=[] + if args.keep_hashes: + keep_hashes.extend(helper.load_json(args.keep_hashes)) + if args.keep_specs: + keep_hashes.extend(get_cache_hashes_from_specs("--deps", *args.keep_specs)) + return keep_hashes + +if __name__=="__main__": + args = configure_parser().parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + + if not args.suffix: + log_suffix = "_" + args.method + else: + log_suffix = args.suffix + + keep_hashes=get_keep_hashes(args) + + cache = FileSystemBuildCache(args.path) + + now = datetime.fromisoformat(args.start_date) + time_window = now - timedelta(days=args.since_days) + + # combine start date and delta for passing to pruners + args.start_date = time_window + + pruner = pruner_factory(cache, args.method, args, keep_hashes, since=time_window) + + print("-- Computing prunable hashes") + prunable_hashes = [] + if args.prune_hashes: + prunable_hashes.extend( helper.load_json(args.prune_hashes)) + else: + prunable_hashes.extend(pruner.determine_prunable_hashes()) + + prune_hash_file = f"{args.output_dir}/prunable-hashes-{log_suffix}.txt" + with open(f"{prune_hash_file}", "w") as fd: + fd.writelines("\n".join(prunable_hashes)) + + if prunable_hashes: + print("-- Finding prunable files") + + pruned = pruner.prune(prunable_hashes) + + pruned_keys = [ obj.key for obj in pruned ] + + print(f"-- Found prunable {len(pruned)} files in buildcache") + total_size_human = convert_size(sum(obj.size for obj in pruned)) + print(f"-- Total Size of prunable files is {total_size_human}") + + prune_list_file = f"{args.output_dir}/prunable-files-{log_suffix}.txt" + with open(f"{prune_list_file}", "w") as fd: + fd.writelines("\n".join(pruned_keys)) + else: + print("-- Nothing to prune") + + if args.delete: + print("-- Pruning build cache") + err, fail = cache.delete(pruned_keys, processes=args.nprocs) + fname_template = f"{args.output_dir}/delete-{{0}}-{log_suffix}.json" + if err: + print(f"errors found") + with open(fname_template.format("errors")) as fd: + helper.write_json(fd, err) + + if fail: + print(f"failures found") + with open(fname_template.format("failures")) as fd: + helper.write_json(fd, fail) + diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index 78ca9b13b..ab4046973 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -10,8 +10,9 @@ class FileSystemObject(Object): def __init__(self, entry: os.DirEntry): - lm = datetime.fromtimestamp(entry.stat().st_mtime) - super().__init__(bucket_name=None, key=entry.path, last_modified = lm) + stat = entry.stat() + lm = datetime.fromtimestamp(stat.st_mtime) + super().__init__(bucket_name=None, key=entry.path, last_modified = lm, size = stat.st_size) if entry.is_file(): self._get_method = self._get_file elif entry.is_dir(): diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 23ccf36c4..57aab66b9 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -1,9 +1,19 @@ from buildcache import BuildCache, Object from datetime import datetime, timedelta, timezone +import argparse import helper import multiprocessing.pool as pool +CLI_ARGS_DICT = { + "start_date": None, + "delete": False, + "nprocs": 1, +} + +DEFAULT_CLI_ARGS = argparse.Namespace() +DEFAULT_CLI_ARGS.__dict__.update(CLI_ARGS_DICT) + class PrunedObject(Object): def __init__(self, obj: Object, method: str): self.__dict__.update(obj.__dict__) @@ -14,7 +24,7 @@ class BasePruner: spec_ext = (".spec.json", ".spec.yaml", ".spec.json.sig") tarball_ext = (".spack", ".tar.gz") - def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args): + def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args=DEFAULT_CLI_ARGS): self.buildcache = buildcache self.keep_hashes = keep_hash_list self.cli_args = cli_args @@ -22,7 +32,10 @@ def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args): self.prunable_hashes = set() self.prune_ext = self.spec_ext + self.tarball_ext - self.start_date = datetime.fromisoformat(cli_args.start_date) + if isinstance(cli_args.start_date, datetime): + self.start_date = cli_args.start_date + else: + self.start_date = datetime.fromisoformat(cli_args.start_date) self.enable_delete = self.cli_args.delete @@ -72,7 +85,7 @@ def _list(self, ext=None, wrapped=True): else: yield obj - def prune(self, prunable_hashes=None): + def prune(self, prunable_hashes=None, compute_size=False): """ Prune the buildcache """ # Get the list of prunable hashes @@ -191,7 +204,7 @@ class OrphanPruner(BasePruner): """Pruning Strategy that looks for .spack binaries with no matching spec.json buildcache """ - def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args): + def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args=DEFAULT_CLI_ARGS): BasePruner.__init__(self, buildcache, None, cli_args) self.date_cutoff = datetime.fromisoformat(cli_args.start_date) @@ -258,20 +271,35 @@ def a_not_in_b(a, b): return self.prunable_hashes +PRUNER_TYPES = { + "direct": DirectPruner, + "orphan": OrphanPruner, + "index": IndexPruner, + } -def pruner_factory(cache, args, keep_hashes=[], since=None): +def pruner_factory(cache, method, args=DEFAULT_CLI_ARGS, keep_hashes=[], since=None): """ Factory with variable args a kwargs """ - # make sure only one type was supplied - type_sum = int(args.direct) + int(args.orphaned) + int(args.check_index) - assert type_sum == 1 - - if args.direct: - return DirectPruner(cache, keep_hashes, args) - elif args.orphaned: - return OrphanPruner(cache, since, args) - elif args.check_index: - return IndexPruner(cache, keep_hashes, args) + obj = PRUNER_TYPES.get(method, None) + + # check's that the arguments passed meet the needs of the pruner objects + new_args = argparse.Namespace() + new_args.__dict__.update(CLI_ARGS_DICT) + # Update the first namespace with values from the second namespace + for key, value in args.__dict__.items(): + if key in new_args.__dict__ and key in args.__dict__: + new_args.__dict__[key] = value + elif key not in new_args.__dict__: + continue + else: + raise Exception(f"Missing {key} in the arguments passed to the pruner factory") + + + + if method=="direct" or method=="index": + return obj(cache, keep_hashes, new_args) + elif method=="orphan": + return obj(cache, since, new_args) else: - raise Exception("Pruner type not implemented") + raise Exception(f"Pruner {method} type not implemented")