From afb8cb9d8ec438299e3f3911724b991c96762661 Mon Sep 17 00:00:00 2001 From: Ilija Vukotic Date: Fri, 7 Jun 2024 08:23:37 +0200 Subject: [PATCH] large --- .github/workflows/deploy-config.json | 6 +- cleanup/Dockerfile | 11 + cleanup/README.md | 3 + cleanup/requirements.txt | 3 + cleanup/resources/cleanup_batch.yaml | 60 +++++ cleanup/resources/start.sh | 2 + .../scripts/cleanup.py | 124 +++------ cleanup/servicex_storage/__init__.py | 0 cleanup/servicex_storage/db_manager.py | 159 +++++++++++ .../servicex_storage/s3_storage_manager.py | 195 ++++++++++++++ minio_cleanup/Dockerfile | 29 -- minio_cleanup/README.md | 2 - minio_cleanup/poetry.lock | 255 ------------------ minio_cleanup/pyproject.toml | 17 -- minio_cleanup/requirements.txt | 2 - minio_cleanup/resources/cleanup_batch.yaml | 28 -- minio_cleanup/resources/start.sh | 5 - minio_cleanup/servicex_storage/__init__.py | 27 -- .../object_storage_manager.py | 119 -------- .../servicex_storage/s3_storage_manager.py | 235 ---------------- .../tests/test_minio_storage_manager.py | 117 -------- 21 files changed, 474 insertions(+), 925 deletions(-) create mode 100644 cleanup/Dockerfile create mode 100644 cleanup/README.md create mode 100644 cleanup/requirements.txt create mode 100644 cleanup/resources/cleanup_batch.yaml create mode 100644 cleanup/resources/start.sh rename minio_cleanup/scripts/minio_cleanup.py => cleanup/scripts/cleanup.py (54%) mode change 100755 => 100644 create mode 100644 cleanup/servicex_storage/__init__.py create mode 100644 cleanup/servicex_storage/db_manager.py create mode 100644 cleanup/servicex_storage/s3_storage_manager.py delete mode 100644 minio_cleanup/Dockerfile delete mode 100644 minio_cleanup/README.md delete mode 100644 minio_cleanup/poetry.lock delete mode 100644 minio_cleanup/pyproject.toml delete mode 100644 minio_cleanup/requirements.txt delete mode 100644 minio_cleanup/resources/cleanup_batch.yaml delete mode 100644 minio_cleanup/resources/start.sh delete mode 100644 minio_cleanup/servicex_storage/__init__.py delete mode 100644 minio_cleanup/servicex_storage/object_storage_manager.py delete mode 100644 minio_cleanup/servicex_storage/s3_storage_manager.py delete mode 100644 minio_cleanup/tests/test_minio_storage_manager.py diff --git a/.github/workflows/deploy-config.json b/.github/workflows/deploy-config.json index b1fd86d98..7881bb1f4 100644 --- a/.github/workflows/deploy-config.json +++ b/.github/workflows/deploy-config.json @@ -50,8 +50,8 @@ "test_required": true }, { - "dir_name": "minio_cleanup", - "image_name": "servicex_minio_cleanup", + "dir_name": "cleanup", + "image_name": "servicex_cleanup", "test_required": false } -] +] \ No newline at end of file diff --git a/cleanup/Dockerfile b/cleanup/Dockerfile new file mode 100644 index 000000000..709dd11cf --- /dev/null +++ b/cleanup/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11 AS builder + +COPY scripts/*.py resources/start.sh requirements.txt ./ +RUN mkdir servicex_storage +COPY servicex_storage/* ./servicex_storage/ + +RUN python3 -m pip install -r requirements.txt + +RUN chmod +x start.sh + +ENTRYPOINT ["./start.sh"] diff --git a/cleanup/README.md b/cleanup/README.md new file mode 100644 index 000000000..bdaa217eb --- /dev/null +++ b/cleanup/README.md @@ -0,0 +1,3 @@ +# Cleanup + +Microservice to cleanup ServiceX database and S3 storage(s) diff --git a/cleanup/requirements.txt b/cleanup/requirements.txt new file mode 100644 index 000000000..4105fe6fe --- /dev/null +++ b/cleanup/requirements.txt @@ -0,0 +1,3 @@ +boto3 +psycopg2 +python-logstash==0.4.8 \ No newline at end of file diff --git a/cleanup/resources/cleanup_batch.yaml b/cleanup/resources/cleanup_batch.yaml new file mode 100644 index 000000000..9c9152161 --- /dev/null +++ b/cleanup/resources/cleanup_batch.yaml @@ -0,0 +1,60 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: cleanup + namespace: servicex +spec: + schedule: "5 * * * *" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + containers: + - name: cleanup + image: ivukotic/servicex_cleanup:develop + imagePullPolicy: Always + env: + - name: S3_URL + value: "http://s3v2.af.uchicago.edu:32095" + - name: PG_PASS + valueFrom: + secretKeyRef: + name: servicex-secrets + key: postgresql-password + - name: ACCESS_KEY + valueFrom: + secretKeyRef: + name: servicex-secrets + key: accesskey + - name: SECRET_KEY + valueFrom: + secretKeyRef: + name: servicex-secrets + key: secretkey + - name: DATABASE_URI + value: >- + postgresql://postgres:$(PG_PASS)@servicex-postgresql:5432/servicex + - name: MAX_S3_AGE + value: "6" + - name: MAX_REQUEST_AGE + value: "30" + - name: MAX_DID_LOOKUP_AGE + value: "30" + - name: HWM + value: "35T" + - name: LWM + value: "30T" + - name: LOGSTASH_HOST + value: servicex.atlas-ml.org + - name: LOGSTASH_PORT + value: "5959" + - name: LOGSTASH_PROTOCOL + value: TCP + - name: LOG_LEVEL + value: INFO + - name: INSTANCE_NAME + value: servicex + - name: PYTHONUNBUFFERED + value: "TRUE" + restartPolicy: Never #OnFailure diff --git a/cleanup/resources/start.sh b/cleanup/resources/start.sh new file mode 100644 index 000000000..fcca6ce8e --- /dev/null +++ b/cleanup/resources/start.sh @@ -0,0 +1,2 @@ +#!/bin/sh +python3.11 ./cleanup.py \ No newline at end of file diff --git a/minio_cleanup/scripts/minio_cleanup.py b/cleanup/scripts/cleanup.py old mode 100755 new mode 100644 similarity index 54% rename from minio_cleanup/scripts/minio_cleanup.py rename to cleanup/scripts/cleanup.py index ef096d2f9..87cde6641 --- a/minio_cleanup/scripts/minio_cleanup.py +++ b/cleanup/scripts/cleanup.py @@ -1,38 +1,11 @@ #!/usr/bin/python3.11 -# Copyright (c) 2019, IRIS-HEP -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import argparse + import logstash import logging import os import sys +from servicex_storage import db_manager from servicex_storage import s3_storage_manager instance = os.environ.get('INSTANCE_NAME', 'Unknown') @@ -74,7 +47,7 @@ def format(self, record): 'tags': self.tags, 'type': self.message_type, 'instance': instance, - 'component': 'minio cleaner', + 'component': 's3 cleaner', # Extra Fields 'level': record.levelname, @@ -101,7 +74,7 @@ def initialize_logging() -> logging.Logger: log = logging.getLogger() formatter = logging.Formatter('%(levelname)s ' + - f"{instance} minio cleaner " + '%(message)s') + f"{instance} s3 cleaner " + '%(message)s') handler = logging.StreamHandler() handler.setFormatter(formatter) handler.setLevel(logging.INFO) @@ -151,23 +124,21 @@ def parse_suffix(size: str) -> int: return int(size) -def run_minio_cleaner(): - """ - Run the minio cleaner - """ +def run_cleaner(): - # Parse the command line arguments - parser = argparse.ArgumentParser() - parser.add_argument('--max-size', dest='max_size', action='store', - default='', - help='Max size allowed before pruning storage') - parser.add_argument('--norm-size', dest='norm_size', action='store', - default='', - help='Size to prune storage to') - parser.add_argument('--max-age', dest='max_age', action='store', - default=30, - type=int, - help='Max age of files in days allowed before pruning storage') + hwm = os.environ.get('HWM', '10T') + lwm = os.environ.get('LWM', '9T') + + try: + raw_hwm = parse_suffix(hwm) + except ValueError: + logger.error(f"Can't parse hwm size, got: {hwm}") + sys.exit(1) + try: + raw_lwm = parse_suffix(lwm) + except ValueError: + logger.error(f"Can't parse lwm size, got: {lwm}") + sys.exit(1) logstash_host = os.environ.get('LOGSTASH_HOST') logstash_port = os.environ.get('LOGSTASH_PORT') @@ -175,7 +146,7 @@ def run_minio_cleaner(): stream_handler = logging.StreamHandler() stream_formatter = StreamFormatter('%(levelname)s ' + - f"{instance} minio_cleanup " + + f"{instance} cleanup " + '%(message)s') stream_handler.setFormatter(stream_formatter) stream_handler.setLevel(level) @@ -188,21 +159,9 @@ def run_minio_cleaner(): logstash_handler.setLevel(level) logger.addHandler(logstash_handler) - args = parser.parse_args() - try: - raw_max = parse_suffix(args.max_size) - except ValueError: - logger.error(f"Can't parse max size, got: {args.max_size}") - sys.exit(1) - try: - raw_norm = parse_suffix(args.norm_size) - except ValueError: - logger.error(f"Can't parse norm size, got: {args.norm_size}") - sys.exit(1) + logger.info("ServiceX S3 Cleaner starting up.") - logger.info("ServiceX Minio Cleaner starting up.") - - env_vars = ['MINIO_URL', 'ACCESS_KEY', 'SECRET_KEY'] + env_vars = ['S3_URL', 'ACCESS_KEY', 'SECRET_KEY', 'DATABASE_URI'] error = False for var in env_vars: if var not in os.environ: @@ -212,32 +171,25 @@ def run_minio_cleaner(): logger.error("Exiting due to missing environment variables") sys.exit(1) - try: - if 'MINIO_ENCRYPT' in os.environ: - if isinstance(os.environ['MINIO_ENCRYPT'], bool): - use_https = os.environ['MINIO_ENCRYPT'] - else: - use_https = strtobool(os.environ['MINIO_ENCRYPT']) + if 'S3_ENCRYPT' in os.environ: + if isinstance(os.environ['S3_ENCRYPT'], bool): + use_https = os.environ['S3_ENCRYPT'] else: - use_https = False - - store = s3_storage_manager.S3Store(s3_endpoint=os.environ['MINIO_URL'], - access_key=os.environ['ACCESS_KEY'], - secret_key=os.environ['SECRET_KEY'], - use_https=use_https) - logger.info("cleanup started") - results = store.cleanup_storage(max_size=raw_max, norm_size=raw_norm, max_age=args.max_age) - logger.info("cleanup stopped finished.", extra={ - "storage used": results[0], - "storage available": raw_max, - "storage HWM": raw_norm}) - for bucket in results[1]: - logger.info(f"Removed folder/bucket: {bucket}") - logger.info("Deleted buckets", extra={'nbuckets': len(results[1])}) - finally: - logger.info('Done running minio storage cleanup') + use_https = strtobool(os.environ['S3_ENCRYPT']) + else: + use_https = False + + store = s3_storage_manager.S3Store(s3_endpoint=os.environ['S3_URL'], + access_key=os.environ['ACCESS_KEY'], + secret_key=os.environ['SECRET_KEY'], + use_https=use_https) + store.cleanup_storage(hwm=raw_hwm, lwm=raw_lwm) + + dbm = db_manager.DBmanager() + dbm.cleanup_db() if __name__ == "__main__": logger = initialize_logging() - run_minio_cleaner() + run_cleaner() + print('All Done.') diff --git a/cleanup/servicex_storage/__init__.py b/cleanup/servicex_storage/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cleanup/servicex_storage/db_manager.py b/cleanup/servicex_storage/db_manager.py new file mode 100644 index 000000000..154a23034 --- /dev/null +++ b/cleanup/servicex_storage/db_manager.py @@ -0,0 +1,159 @@ +import logging +import os +import psycopg2 +from typing import List +import datetime + +max_request_age = int(os.environ.get('MAX_REQUEST_AGE', '30')) +max_did_age = int(os.environ.get('MAX_DID_LOOKUP_AGE', '6')) +db_uri = os.environ.get('DATABASE_URI') + + +class Req(): + def __init__(self, req_id, submit_time, status, did_id): + self.req_id = req_id + self.submit_time = submit_time + self.status = status + self.did_id = did_id + + def __str__(self) -> str: + return f"req_id: {self.req_id} submit time: {self.submit_time} status:{self.status} did_id:{self.did_id}" + + +class DS(): + def __init__(self, id, last_used, last_updated, lookup_status): + self.ds_id = id + self.last_used = last_used + self.last_updated = last_updated + self.lookup_status = lookup_status + + def __str__(self) -> str: + return f"ds_id:{self.ds_id} last used:{self.last_used} last updated:{self.last_updated} status:{self.lookup_status}" + + +class DBmanager(): + + def __init__(self): + + self.logger = logging.getLogger(__name__) + self.logger.addHandler(logging.NullHandler()) + self.conn = psycopg2.connect(db_uri) + + def get_servicex_requests(self) -> List[Req]: + requests = [] + try: + cursor = self.conn.cursor() + cursor.execute( + "SELECT request_id, submit_time, status, did_id FROM requests ORDER BY submit_time ASC" + ) + rows = cursor.fetchall() + for row in rows: + # print(row) + requests.append(Req(row[0], row[1], row[2], row[3])) + cursor.close() + + except psycopg2.Error as e: + print(f"Error geting requests: {e}") + return requests + + def get_datasets(self) -> List[DS]: + datasets = [] + try: + cursor = self.conn.cursor() + cursor.execute( + "SELECT id, last_used, last_updated, lookup_status FROM datasets ORDER BY last_used ASC" + ) + rows = cursor.fetchall() + for row in rows: + # print(row) + datasets.append(DS(row[0], row[1], row[2], row[3])) + cursor.close() + + except psycopg2.Error as e: + print(f"Error geting requests: {e}") + return datasets + + def delete_transform_results_req_id(self, req_id): + try: + cursor = self.conn.cursor() + cursor.execute( + f"delete from transform_result where request_id='{req_id}'" + ) + self.conn.commit() + cursor.close() + self.logger.info(f'deleted transform_result related to {req_id}') + + except psycopg2.Error as e: + print(f"Error deleting transform_result for req_id: {req_id}.\n{e}") + self.conn.rollback() + + def delete_transform_results_file_id(self, file_id): + pass + + def delete_request(self, req): + + # first remove transform_result + self.delete_transform_results_req_id(req.req_id) + + try: + cursor = self.conn.cursor() + cursor.execute( + f"delete from requests where request_id='{req.req_id}'" + ) + self.conn.commit() + cursor.close() + self.logger.info(f'deleted request {req.req_id}') + + except psycopg2.Error as e: + print(f"Error deleting request: {req.req_id},\n{e}") + self.conn.rollback() + + def delete_files(self, dataset_id): + pass + + def delete_dataset(self, ds): + + # first remove transform_result + # self.delete_transform_results_req_id(req.req_id) + + try: + cursor = self.conn.cursor() + cursor.execute( + f"delete from datasets where id='{ds.ds_id}'" + ) + self.conn.commit() + cursor.close() + self.logger.info(f'deleted dataset {ds.ds_id}') + + except psycopg2.Error as e: + # print(f"Error deleting dataset: {ds.ds_id}.\n{e}") + self.conn.rollback() + + def cleanup_db(self): + """ + Clean up db + """ + + # get all the requests known to ServiceX + requests = self.get_servicex_requests() + self.logger.info('request in servicex db', extra={"requests": len(requests)}) + + # delete too old requests. + for req in requests: + req_age = datetime.datetime.now( + datetime.timezone.utc) - req.submit_time.replace(tzinfo=datetime.timezone.utc) + if req_age.days > max_request_age: + self.delete_request(req) + + # get all the datasets in ServiceX + datasets = self.get_datasets() + self.logger.info('datasets in servicex db', extra={"datasets": len(datasets)}) + + # delete datasets not used recently + for ds in datasets: + ds_age = datetime.datetime.now( + datetime.timezone.utc) - ds.last_used.replace(tzinfo=datetime.timezone.utc) + if ds_age.days > max_did_age: + self.delete_dataset(ds) + + self.conn.close() diff --git a/cleanup/servicex_storage/s3_storage_manager.py b/cleanup/servicex_storage/s3_storage_manager.py new file mode 100644 index 000000000..f82690a10 --- /dev/null +++ b/cleanup/servicex_storage/s3_storage_manager.py @@ -0,0 +1,195 @@ +import datetime +import logging +import os +import boto3 +import boto3.session +from botocore.client import ClientError +import psycopg2 + +max_bucket_age = int(os.environ.get('MAX_S3_AGE', '30')) +max_request_age = int(os.environ.get('MAX_REQUEST_AGE', '30')) +max_did_age = int(os.environ.get('MAX_DID_LOOKUP_AGE', '7')) +db_uri = os.environ.get('DATABASE_URI') + + +def toTiB(nbytes): + return float(nbytes)/1024/1024/1024/1024 + + +def toGiB(nbytes): + return float(nbytes)/1024/1024/1024 + + +class BucketInfo(): + def __init__(self, name, request_created, status): + self.name = name + self.last_modified = request_created + self.size = 0 + self.objects = 0 + self.to_delete = False + self.deleted = False + self.present = False + self.status = status + + def __str__(self) -> str: + return f"name:{self.name} toDelete:{self.to_delete} deleted:{self.deleted} lm: {self.last_modified} size: {self.size} objects: {self.objects} status: {self.status}" + + +class S3Store(): + + def __init__(self, + s3_endpoint: str, access_key: str, secret_key: str, use_https: bool = False): + + self.logger = logging.getLogger(__name__) + self.logger.addHandler(logging.NullHandler()) + + self.s3_endpoint = s3_endpoint + self.access_key = access_key + self.secret_key = secret_key + self.use_https = use_https + self.s3 = boto3.client( + service_name='s3', + endpoint_url=self.s3_endpoint, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + verify=self.use_https + ) + self.s3_resource = boto3.resource( + 's3', + endpoint_url=self.s3_endpoint, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + verify=self.use_https + ) + + def get_bucket_info(self, bucket) -> BucketInfo: + """ + Given a bucket, get the size and last modified date + :param bucket: bucket name, bucket last modified + """ + b = BucketInfo(bucket[0], bucket[1], bucket[2]) + try: + response = self.s3.head_bucket(Bucket=b.name) + # print(response) + if response['ResponseMetadata']['HTTPStatusCode'] == 200: + b.present = True + head = response['ResponseMetadata']['HTTPHeaders'] + b.size = int(head['x-rgw-bytes-used']) + b.objects = int(head['x-rgw-object-count']) + except ClientError: + pass + # print(f"no such bucket {b.name}") + + if not b.present: + return + + bucket_age = datetime.datetime.now( + datetime.timezone.utc) - b.last_modified.replace(tzinfo=datetime.timezone.utc) + # marking empty buckets for deletion + if b.size == 0 and bucket_age.days*86400 + bucket_age.seconds >= 7200: + b.to_delete = True + # marking old buckets for deletion + if bucket_age.days > max_bucket_age: + b.to_delete = True + # marking buckets from canceled requests for deletion + if b.status == 'canceled': + b.to_delete = True + + print(b) + return b + + def delete_bucket(self, bi: BucketInfo): + """ + Delete a given bucket and contents + """ + self.logger.info("deleting bucket", extra={"requestId": bi.name}) + bucket = self.s3_resource.Bucket(bi.name) + for s3_object in bucket.objects.all(): + s3_object.delete() + bucket.delete() + bi.deleted = True + + def bi_sums(self, bil, message): + total_buckets = 0 + total_objects = 0 + total_size = 0 + for bi in bil: + total_buckets += 1 + total_size += bi.size + total_objects += bi.objects + self.logger.info(message, extra={ + "buckets": total_buckets, + "objects": total_objects, + "size": total_size + }) + return total_size + + def get_servicex_requests(self): + requests = [] + try: + # Establish a connection to the database + conn = psycopg2.connect(db_uri) + cursor = conn.cursor() + cursor.execute("SELECT request_id, submit_time, status FROM requests") + rows = cursor.fetchall() + for row in rows: + # print(row) + requests.append([row[0], row[1], row[2]]) + cursor.close() + conn.close() + + except psycopg2.Error as e: + print(f"Error connecting to the database: {e}") + return requests + + def cleanup_storage(self, hwm: int, lwm: int): + """ + Clean up storage by removing old files until below max_size + :param hwm: max amount of storage that can be used before trying to clean up + :param lwm: when this size is achieved, stop removing files + """ + + # get all the requests known to ServiceX + buckets = self.get_servicex_requests() + + self.logger.info('request in servicex db', extra={"requests": len(buckets)}) + + buckets_info = [] + for b in buckets: + bi = self.get_bucket_info(b) + if bi: + buckets_info.append(bi) + + total_size = self.bi_sums(buckets_info, "before old cleanup.") + + buckets_info.sort(key=lambda x: x.last_modified) + + new_buckets_info = [] + for bi in buckets_info: + if bi.to_delete: + self.delete_bucket(bi) + else: + new_buckets_info.append(bi) + + total_size = self.bi_sums(new_buckets_info, "after old cleanup.") + + # marking for deletion in order to get to LWM + if total_size < hwm: + print('not above HWM') + else: + extra_space_needed = total_size-lwm + print(f"above HWM. need to free: {extra_space_needed} bytes.") + for bi in new_buckets_info: + if extra_space_needed < 0: + break + bi.to_delete = True + extra_space_needed -= bi.size + + buckets_info = [] + for bi in new_buckets_info: + if bi.to_delete: + self.delete_bucket(bi) + else: + buckets_info.append(bi) + + total_size = self.bi_sums(buckets_info, "after size cleanup.") diff --git a/minio_cleanup/Dockerfile b/minio_cleanup/Dockerfile deleted file mode 100644 index df3f7c4fa..000000000 --- a/minio_cleanup/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.11 AS builder - -RUN useradd -ms /bin/bash cleanup - -COPY pyproject.toml poetry.lock /home/cleanup/ -WORKDIR /home/cleanup - -FROM builder as poetry -ENV POETRY_HOME=/home/cleanup -ENV POETRY_VIRTUALENVS_IN_PROJECT=true -ENV PATH="$POETRY_HOME/bin:$PATH" -RUN python -c 'from urllib.request import urlopen; print(urlopen("https://install.python-poetry.org").read().decode())' | python - -COPY resources ./ -RUN poetry install --no-interaction --no-ansi -vvv - -FROM builder AS runtime - -COPY --from=poetry /home/cleanup /home/cleanup -WORKDIR /home/cleanup -RUN mkdir ./cleanup -COPY scripts/*.py resources/start.sh ./ -RUN mkdir servicex_storage -COPY servicex_storage/* ./servicex_storage/ - -RUN chmod +x start.sh - -USER cleanup - -ENTRYPOINT ["./start.sh"] diff --git a/minio_cleanup/README.md b/minio_cleanup/README.md deleted file mode 100644 index 3ee91ef65..000000000 --- a/minio_cleanup/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# storage_cleanup -Microservice to cleanup storage used by ServiceX diff --git a/minio_cleanup/poetry.lock b/minio_cleanup/poetry.lock deleted file mode 100644 index d25fa2501..000000000 --- a/minio_cleanup/poetry.lock +++ /dev/null @@ -1,255 +0,0 @@ -[[package]] -name = "argon2-cffi" -version = "23.1.0" -description = "Argon2 for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -argon2-cffi-bindings = "*" - -[package.extras] -dev = ["argon2-cffi[tests,typing]", "tox (>4)"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-notfound-page"] -tests = ["hypothesis", "pytest"] -typing = ["mypy"] - -[[package]] -name = "argon2-cffi-bindings" -version = "21.2.0" -description = "Low-level CFFI bindings for Argon2" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -cffi = ">=1.0.1" - -[package.extras] -dev = ["cogapp", "pre-commit", "pytest", "wheel"] -tests = ["pytest"] - -[[package]] -name = "certifi" -version = "2023.11.17" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "cffi" -version = "1.16.0" -description = "Foreign Function Interface for Python calling C code." -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pycparser = "*" - -[[package]] -name = "minio" -version = "7.2.3" -description = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -argon2-cffi = "*" -certifi = "*" -pycryptodome = "*" -typing-extensions = "*" -urllib3 = "*" - -[[package]] -name = "pycparser" -version = "2.21" -description = "C parser in Python" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pycryptodome" -version = "3.20.0" -description = "Cryptographic library for Python" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "python-logstash" -version = "0.4.8" -description = "Python logging handler for Logstash." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "typing-extensions" -version = "4.9.0" -description = "Backported and Experimental Type Hints for Python 3.8+" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "urllib3" -version = "2.1.0" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[metadata] -lock-version = "1.1" -python-versions = "^3.11" -content-hash = "0d75f663ca9e370b4d03d558cbd2b1ea96ea35a4aa321b446051502d6a2ff9af" - -[metadata.files] -argon2-cffi = [ - {file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"}, - {file = "argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08"}, -] -argon2-cffi-bindings = [ - {file = "argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082"}, - {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f"}, - {file = "argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93"}, - {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194"}, - {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f"}, - {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5"}, - {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351"}, - {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7"}, - {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583"}, - {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d"}, - {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670"}, - {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"}, - {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"}, -] -certifi = [ - {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, - {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, -] -cffi = [ - {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, - {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, - {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, - {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, - {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, - {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, - {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, - {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, - {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, - {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, - {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, - {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, - {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, - {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, - {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, -] -minio = [ - {file = "minio-7.2.3-py3-none-any.whl", hash = "sha256:e6b5ce0a9b4368da50118c3f0c4df5dbf33885d44d77fce6c0aa1c485e6af7a1"}, - {file = "minio-7.2.3.tar.gz", hash = "sha256:4971dfb1a71eeefd38e1ce2dc7edc4e6eb0f07f1c1d6d70c15457e3280cfc4b9"}, -] -pycparser = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, -] -pycryptodome = [ - {file = "pycryptodome-3.20.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:f0e6d631bae3f231d3634f91ae4da7a960f7ff87f2865b2d2b831af1dfb04e9a"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:baee115a9ba6c5d2709a1e88ffe62b73ecc044852a925dcb67713a288c4ec70f"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:417a276aaa9cb3be91f9014e9d18d10e840a7a9b9a9be64a42f553c5b50b4d1d"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a1250b7ea809f752b68e3e6f3fd946b5939a52eaeea18c73bdab53e9ba3c2dd"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:d5954acfe9e00bc83ed9f5cb082ed22c592fbbef86dc48b907238be64ead5c33"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-win32.whl", hash = "sha256:06d6de87c19f967f03b4cf9b34e538ef46e99a337e9a61a77dbe44b2cbcf0690"}, - {file = "pycryptodome-3.20.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ec0bb1188c1d13426039af8ffcb4dbe3aad1d7680c35a62d8eaf2a529b5d3d4f"}, - {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5601c934c498cd267640b57569e73793cb9a83506f7c73a8ec57a516f5b0b091"}, - {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d29daa681517f4bc318cd8a23af87e1f2a7bad2fe361e8aa29c77d652a065de4"}, - {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3427d9e5310af6680678f4cce149f54e0bb4af60101c7f2c16fdf878b39ccccc"}, - {file = "pycryptodome-3.20.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:3cd3ef3aee1079ae44afaeee13393cf68b1058f70576b11439483e34f93cf818"}, - {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac1c7c0624a862f2e53438a15c9259d1655325fc2ec4392e66dc46cdae24d044"}, - {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76658f0d942051d12a9bd08ca1b6b34fd762a8ee4240984f7c06ddfb55eaf15a"}, - {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f35d6cee81fa145333137009d9c8ba90951d7d77b67c79cbe5f03c7eb74d8fe2"}, - {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cb39afede7055127e35a444c1c041d2e8d2f1f9c121ecef573757ba4cd2c3c"}, - {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a4c4dc60b78ec41d2afa392491d788c2e06edf48580fbfb0dd0f828af49d25"}, - {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fb3b87461fa35afa19c971b0a2b7456a7b1db7b4eba9a8424666104925b78128"}, - {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:acc2614e2e5346a4a4eab6e199203034924313626f9620b7b4b38e9ad74b7e0c"}, - {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:210ba1b647837bfc42dd5a813cdecb5b86193ae11a3f5d972b9a0ae2c7e9e4b4"}, - {file = "pycryptodome-3.20.0-cp35-abi3-win32.whl", hash = "sha256:8d6b98d0d83d21fb757a182d52940d028564efe8147baa9ce0f38d057104ae72"}, - {file = "pycryptodome-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:9b3ae153c89a480a0ec402e23db8d8d84a3833b65fa4b15b81b83be9d637aab9"}, - {file = "pycryptodome-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:4401564ebf37dfde45d096974c7a159b52eeabd9969135f0426907db367a652a"}, - {file = "pycryptodome-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:ec1f93feb3bb93380ab0ebf8b859e8e5678c0f010d2d78367cf6bc30bfeb148e"}, - {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:acae12b9ede49f38eb0ef76fdec2df2e94aad85ae46ec85be3648a57f0a7db04"}, - {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47888542a0633baff535a04726948e876bf1ed880fddb7c10a736fa99146ab3"}, - {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e0e4a987d38cfc2e71b4a1b591bae4891eeabe5fa0f56154f576e26287bfdea"}, - {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c18b381553638414b38705f07d1ef0a7cf301bc78a5f9bc17a957eb19446834b"}, - {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a60fedd2b37b4cb11ccb5d0399efe26db9e0dd149016c1cc6c8161974ceac2d6"}, - {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:405002eafad114a2f9a930f5db65feef7b53c4784495dd8758069b89baf68eab"}, - {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ab6ab0cb755154ad14e507d1df72de9897e99fd2d4922851a276ccc14f4f1a5"}, - {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acf6e43fa75aca2d33e93409f2dafe386fe051818ee79ee8a3e21de9caa2ac9e"}, - {file = "pycryptodome-3.20.0.tar.gz", hash = "sha256:09609209ed7de61c2b560cc5c8c4fbf892f8b15b1faf7e4cbffac97db1fffda7"}, -] -python-logstash = [ - {file = "python-logstash-0.4.8.tar.gz", hash = "sha256:d04e1ce11ecc107e4a4f3b807fc57d96811e964a554081b3bbb44732f74ef5f9"}, -] -typing-extensions = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, -] -urllib3 = [ - {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, - {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, -] diff --git a/minio_cleanup/pyproject.toml b/minio_cleanup/pyproject.toml deleted file mode 100644 index 597ce4d57..000000000 --- a/minio_cleanup/pyproject.toml +++ /dev/null @@ -1,17 +0,0 @@ -[tool.poetry] -name = "minio_cleanup" -version = "0.1.0" -description = "" -authors = ["Suchandra Thapa "] - -[tool.poetry.dependencies] -python = "^3.11" -minio = "^7.1.12" -python-logstash = "^0.4.8" -urllib3 = "^2.0.7" - -[tool.poetry.dev-dependencies] - -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" diff --git a/minio_cleanup/requirements.txt b/minio_cleanup/requirements.txt deleted file mode 100644 index 9e347f602..000000000 --- a/minio_cleanup/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -minio -servicex_storage \ No newline at end of file diff --git a/minio_cleanup/resources/cleanup_batch.yaml b/minio_cleanup/resources/cleanup_batch.yaml deleted file mode 100644 index e70a42110..000000000 --- a/minio_cleanup/resources/cleanup_batch.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: batch/v1 -kind: CronJob -metadata: - name: minio-cleanup -spec: - schedule: "* */8 * * *" - jobTemplate: - spec: - template: - spec: - containers: - - name: minio-cleanup - image: cleanup:0.1 - imagePullPolicy: IfNotPresent - env: - - name: MINIO_URL - value: test - - name: ACCESS_KEY - value: access - - name: SECRET_KEY - value: secret - - name: MAX_AGE - value: 30 - - name: MAX_SIZE - value: '1G' - - name: NORM_SIZE - value: '700M' - restartPolicy: OnFailure \ No newline at end of file diff --git a/minio_cleanup/resources/start.sh b/minio_cleanup/resources/start.sh deleted file mode 100644 index e016de373..000000000 --- a/minio_cleanup/resources/start.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -PATH=.venv/bin:$PATH -. .venv/bin/activate -env -python3.11 ./minio_cleanup.py --max-size $MAX_SIZE --norm-size $NORM_SIZE --max-age $MAX_AGE \ No newline at end of file diff --git a/minio_cleanup/servicex_storage/__init__.py b/minio_cleanup/servicex_storage/__init__.py deleted file mode 100644 index 794b0f111..000000000 --- a/minio_cleanup/servicex_storage/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2021, IRIS-HEP -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/minio_cleanup/servicex_storage/object_storage_manager.py b/minio_cleanup/servicex_storage/object_storage_manager.py deleted file mode 100644 index 900364eee..000000000 --- a/minio_cleanup/servicex_storage/object_storage_manager.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Definition for abstract Object storage manager class -""" - - -# Copyright (c) 2019, IRIS-HEP -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import abc -import pathlib -from typing import List -from typing import Tuple - - -class ObjectStore(abc.ABC): - """ - Abstract class for object storage managers to use - """ - @abc.abstractmethod - def get_storage_used(self) -> int: - """ - Get storage used by object store - :return: total storage used in bytes - """ - - @abc.abstractmethod - def upload_file(self, bucket: str, object_name: str, path: pathlib.Path) -> None: - """ - Save file to object store - :param bucket: name of bucket - :param object_name: name of object - :param path: path to source file - :return: None - """ - - @abc.abstractmethod - def cleanup_storage(self, - max_size: int, norm_size: int, max_age: int) -> Tuple[int, List[str]]: - """ - Reduce storage used until it's less than max_size - :param max_size: Maximum amount of storage to use before trying to clean up - :param norm_size: when this size is achieved, stop removing files - :param max_age: Maximum number of days a bucket can be before it is cleaned up - :return: Tuple with final storage used and list of buckets removed - """ - - @abc.abstractmethod - def delete_object(self, bucket: str, object_name: str) -> None: - """ - Delete object from store - :param bucket: name of bucket - :param object_name: name of object - :return: None - """ - - @abc.abstractmethod - def delete_objects(self, bucket: str, object_names: List[str]) -> List[Tuple[str, str]]: - """ - Delete object from store - :param bucket: name of bucket - :param object_names: name of object - :return: List of booleans indicating whether each corresponding object was deleted - """ - - @abc.abstractmethod - def get_file(self, bucket: str, object_name: str, path: pathlib.Path) -> None: - """ - Get an object from store - :param bucket: name of bucket - :param object_name: name of object - :param path: path to destination file (must not be present) - :return: None - """ - - @abc.abstractmethod - def get_buckets(self) -> List[str]: - """ - Get an list of buckets from store - :return: List of buckets - """ - - @abc.abstractmethod - def create_bucket(self, bucket: str) -> bool: - """ - Create a bucket with given id - :return: True on success, False otherwise - """ - - @abc.abstractmethod - def delete_bucket(self, bucket: str) -> bool: - """ - Get delete bucket from store - :return: True on success, False otherwise - """ diff --git a/minio_cleanup/servicex_storage/s3_storage_manager.py b/minio_cleanup/servicex_storage/s3_storage_manager.py deleted file mode 100644 index 092cbd510..000000000 --- a/minio_cleanup/servicex_storage/s3_storage_manager.py +++ /dev/null @@ -1,235 +0,0 @@ -""" -Implementation of storage manager for minio based storage -""" - -import datetime -import logging -import os -import pathlib -import concurrent.futures -from typing import List -from typing import Tuple -from collections import namedtuple - -import minio -from minio.deleteobjects import DeleteObject - -from servicex_storage import object_storage_manager - -BucketInfo = namedtuple('BucketInfo', ['name', 'size', 'last_modified']) - - -class S3Store(object_storage_manager.ObjectStore): - """ - Class to handle operations for minio storage - """ - - def __init__(self, - s3_endpoint: str, access_key: str, secret_key: str, use_https: bool = False): - super().__init__() - - self.logger = logging.getLogger(__name__) - self.logger.addHandler(logging.NullHandler()) - - self.s3_endpoint = s3_endpoint - self.access_key = access_key - self.secret_key = secret_key - - # s3 client is thread safe using Threading, not so much with multiprocessing - self.__s3_client = minio.Minio(self.s3_endpoint, - access_key=self.access_key, - secret_key=self.secret_key, - secure=use_https) - - # set up threads to use - if "THREADS" in os.environ: - try: - self.__threads = int(os.environ["THREADS"]) - self.logger.debug("Using %d threads for storage cleanup", self.__threads) - except ValueError: - self.logger.exception("THREADS env variable not a number, using a single thread") - self.__threads = 1 - else: - self.__threads = 1 - - def get_bucket_info(self, bucket: str) -> BucketInfo: - """ - Given a bucket, get the size and last modified date - :param bucket: bucket name - :return: None - """ - - objects = self.__s3_client.list_objects(bucket) - size = 0 - last_modified = datetime.datetime.now(datetime.timezone.utc) - for obj in objects: - if obj.object_name[-1] == '/': - # this is a bucket within a bucket, skip because this isn't generated - # by ServiceX - continue - result = self.__s3_client.stat_object(obj.bucket_name, obj.object_name) - size += result.size - if result.last_modified < last_modified: - last_modified = result.last_modified - return BucketInfo(name=bucket, size=size, last_modified=last_modified) - - def delete_bucket(self, bucket: str) -> bool: - """ - Delete a given bucket and contents from minio - :param bucket: bucket name - :return: None - """ - if not self.__s3_client.bucket_exists(bucket): - return True - delete_objects = map(lambda x: DeleteObject(x.object_name), - self.__s3_client.list_objects(bucket)) - errors = 0 - for error in self.__s3_client.remove_objects(bucket, delete_objects): - errors += 1 - if errors != 0: - return False - self.__s3_client.remove_bucket(bucket) - return True - - def get_storage_used(self) -> int: - """ - Get the number of bytes used - - :return: integer with number of bytes used - """ - buckets = self.__s3_client.list_buckets() - if len(buckets) == 0: - return 0 - - # must use ThreadPool since minio client is thread safe with threading only - with concurrent.futures.ThreadPoolExecutor(max_workers=self.__threads) as executor: - sizes = executor.map(lambda x: self.get_bucket_info(x).size, buckets) - total_size = sum(sizes) - return total_size - - def delete_object(self, bucket: str, object_name: str) -> None: - """ - Remove object from minio storage - :param bucket: name of bucket - :param object_name: name of object - :return: None - """ - self.__s3_client.remove_object(bucket, object_name) - - def delete_objects(self, bucket: str, object_names: List[str]) -> List[Tuple[str, str]]: - """ - Delete object from store - :param bucket: name of bucket - :param object_names: name of object - :return: List of tuples (objectName, error_message) - """ - delete_objects = [DeleteObject(x) for x in object_names] - delete_results = self.__s3_client.remove_objects(bucket, delete_objects) - return [(x.name, x.message) for x in delete_results] - - def get_file(self, bucket: str, object_name: str, path: pathlib.Path) -> None: - """ - Get object from minio and save to given path - :param bucket: bucket name - :param object_name: object name - :param path: path to save - :return: None - """ - try: - resp = self.__s3_client.fget_object(bucket, object_name, path) - except Exception: # pylint: disable=broad-except - self.logger.exception("Got an exception while getting object") - finally: - resp.close() # pylint: disable=no-member - resp.release_conn() # pylint: disable=no-member - - def upload_file(self, bucket: str, object_name: str, path: pathlib.Path) -> None: - """ - Upload file to minio storage - :param bucket: bucket name - :param object_name: destination object name - :param path: path of file source - :return: None - """ - if not os.path.isfile(path): - mesg = f"Can't upload {path}: not present or not a file" - self.logger.error(mesg) - raise IOError(mesg) - self.__s3_client.fput_object(bucket, object_name, path) - - def cleanup_storage(self, - max_size: int, norm_size: int, max_age: int) -> Tuple[int, List[str]]: - """ - Clean up storage by removing old files until below max_size - :param max_size: max amount of storage that can be used before trying to clean up - :param norm_size: when this size is achieved, stop removing files - :param max_age: max number of days a bucket can be before it is deleted - :return: Tuple with final size of storage used and list of buckets removed - """ - buckets = map(lambda x: x.name, self.__s3_client.list_buckets()) - cleaned_buckets = [] - # must use ThreadPool since minio client is thread safe with threading only - with concurrent.futures.ThreadPoolExecutor(max_workers=self.__threads) as executor: - bucket_list = executor.map(self.get_bucket_info, buckets) - - # concurrently delete any old buckets - with concurrent.futures.ThreadPoolExecutor(max_workers=self.__threads) as executor: - kept_buckets = [] - old_buckets = [] - for bucket in bucket_list: - bucket_age = (datetime.datetime.now( - datetime.timezone.utc) - bucket.last_modified).days - if bucket_age > max_age: - old_buckets.append((bucket.name, bucket_age)) - else: - kept_buckets.append(bucket) - - futures = { - executor.submit(lambda x: self.delete_bucket(x), bucket[0]): (bucket[0], bucket[1]) - for bucket in old_buckets - } - for future in concurrent.futures.as_completed(futures): - bucket_info = futures[future] - try: - deleted = future.result() - # use mesg in both log outputs with different capitalizations of D - mesg = f"eleting {bucket_info[0]} due to age: {bucket_info[1]} days" - if deleted: - self.logger.info("D%s", mesg) - cleaned_buckets.append(bucket_info[0]) - else: - self.logger.error("Error d%s", mesg) - except Exception: # pylint: disable=broad-except - self.logger.exception( - "Received exception while deleting %s due to age", bucket_info[0]) - - kept_buckets.sort(key=lambda x: x.last_modified) - idx = 0 - current_size = sum(map(lambda x: x.size, kept_buckets)) - if current_size > max_size: - while current_size > norm_size and idx < len(kept_buckets): - bucket = kept_buckets[idx] - self.logger.info("Deleting %s due to storage limits", bucket.name) - self.delete_bucket(bucket.name) - cleaned_buckets.append(bucket.name) - current_size -= bucket.size - idx += 1 - return current_size, cleaned_buckets - - def get_buckets(self) -> List[str]: - """ - Get list of buckets in s3 - :return: list of bucket names - """ - return [x.name for x in self.__s3_client.list_buckets()] - - def create_bucket(self, bucket: str) -> bool: - """ - Create a bucket with given id - :return: None - """ - try: - self.__s3_client.make_bucket(bucket) - return True - except: - return False diff --git a/minio_cleanup/tests/test_minio_storage_manager.py b/minio_cleanup/tests/test_minio_storage_manager.py deleted file mode 100644 index 083075e4b..000000000 --- a/minio_cleanup/tests/test_minio_storage_manager.py +++ /dev/null @@ -1,117 +0,0 @@ -import datetime -import unittest -from collections import namedtuple - -from unittest.mock import patch - -import servicex_storage.s3_storage_manager - -ObjectInfo = namedtuple('ObjectInfo', ['size', 'last_modified']) -s3_fake_objects = { - "bucket1": { - "object1": ObjectInfo(size=10, - last_modified=datetime.datetime(year=2021, month=10, day=1, hour=10, minute=10, second=10)), - "object2": ObjectInfo(size=20, - last_modified=datetime.datetime(year=2021, month=10, day=1, hour=10, minute=11, second=10)), - "object3": ObjectInfo(size=30, - last_modified=datetime.datetime(year=2021, month=10, day=1, hour=10, minute=12, second=10)), - }, - "bucket2": { - "object4": ObjectInfo(size=100, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=10, second=10)), - "object5": ObjectInfo(size=200, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=11, second=10)), - "object6": ObjectInfo(size=300, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=12, second=10)), - } -} - - -class MyTestCase(unittest.TestCase): - @patch('minio.Minio') - def test_s3_get_bucket_info(self, mock_class): - """ - Test s3's get bucket info - :return: None - """ - - mock_class().list_objects.return_value = list(s3_fake_objects["bucket1"].keys()) - mock_class().stat_object.side_effect = list(s3_fake_objects["bucket1"].values()) - return_value = servicex_storage.s3_storage_manager.BucketInfo(name="bucket1", - size=60, - last_modified=datetime.datetime( - year=2021, month=10, - day=1, hour=10, - minute=10, second=10)) - test_obj = servicex_storage.s3_storage_manager.S3Store(s3_endpoint="abc", - access_key="abc", - secret_key="abc") - bucket_info = test_obj.get_bucket_info("bucket1") - self.assertEqual(bucket_info, return_value) - - @patch('minio.Minio') - def test_minio_get_storage_used(self, mock_class): - """ - Test getting storage used by a s3 bucket - :return: None - """ - mock_class().list_buckets.return_value = list(s3_fake_objects.keys()) - mock_class().list_objects.side_effect = [list(s3_fake_objects["bucket1"].keys()), - list(s3_fake_objects["bucket2"].keys())] - mock_class().stat_object.side_effect = list(s3_fake_objects["bucket1"].values()) + \ - list(s3_fake_objects["bucket2"].values()) - - test_obj = servicex_storage.s3_storage_manager.S3Store(s3_endpoint="abc", - access_key="abc", - secret_key="abc") - - bucket_size = test_obj.get_storage_used() - self.assertEqual(bucket_size, 660) - - @patch('minio.Minio') - def test_s3_cleanup_storage(self, mock_class): - """ - Test minio's get bucket info - :return: None - """ - current_s3_fake_objects = { - "bucket1": { - "object1": ObjectInfo(size=10, - last_modified=datetime.datetime.utcnow()), - "object2": ObjectInfo(size=20, - last_modified=datetime.datetime.utcnow()), - "object3": ObjectInfo(size=30, - last_modified=datetime.datetime.utcnow()), - }, - "bucket2": { - "object4": ObjectInfo(size=100, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=10, - second=10)), - "object5": ObjectInfo(size=200, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=11, - second=10)), - "object6": ObjectInfo(size=300, - last_modified=datetime.datetime(year=2020, month=10, day=1, hour=10, minute=12, - second=10)), - } - } - - mock_class().list_buckets.return_value = list(current_s3_fake_objects.keys()) - mock_class().list_objects.side_effect = [list(current_s3_fake_objects["bucket1"].keys()), - list(current_s3_fake_objects["bucket2"].keys()), - list(current_s3_fake_objects["bucket2"].keys())] - mock_class().stat_object.side_effect = list(current_s3_fake_objects["bucket1"].values()) + \ - list(current_s3_fake_objects["bucket2"].values()) - - test_obj = servicex_storage.s3_storage_manager.S3Store(s3_endpoint="abc", - access_key="abc", - secret_key="abc") - - final_size = test_obj.cleanup_storage(70, 60, 365)[0] - self.assertEqual(final_size, 60) - mock_class().remove_objects.assert_called_with( - "bucket2", ["object4", "object5", "object6"]) - - -if __name__ == '__main__': - unittest.main()