diff --git a/HISTORY.md b/HISTORY.md index b39bee8c..f80fdb01 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ## Unreleased - Fixed `CloudPath(...) / other` to correctly attempt to fall back on `other`'s `__rtruediv__` implementation, in order to support classes that explicitly support the `/` with a `CloudPath` instance. Previously, this would always raise a `TypeError` if `other` were not a `str` or `PurePosixPath`. (PR [#479](https://github.com/drivendataorg/cloudpathlib/pull/479)) +- Add `md5` property to `GSPath`, updated LocalGSPath to include `md5` property, updated mock_gs.MockBlob to include `md5_hash` property. ## v0.20.0 (2024-10-18) diff --git a/README.md b/README.md index c421ee60..2013f74e 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one | `bucket` | ❌ | ✅ | ✅ | | `container` | ✅ | ❌ | ❌ | | `key` | ❌ | ✅ | ❌ | -| `md5` | ✅ | ❌ | ❌ | +| `md5` | ✅ | ❌ | ✅ | ---- diff --git a/cloudpathlib/gs/gsclient.py b/cloudpathlib/gs/gsclient.py index edd5b88a..c66e661d 100644 --- a/cloudpathlib/gs/gsclient.py +++ b/cloudpathlib/gs/gsclient.py @@ -121,6 +121,7 @@ def _get_metadata(self, cloud_path: GSPath) -> Optional[Dict[str, Any]]: "size": blob.size, "updated": blob.updated, "content_type": blob.content_type, + "md5_hash": blob.md5_hash, } def _download_file(self, cloud_path: GSPath, local_path: Union[str, os.PathLike]) -> Path: diff --git a/cloudpathlib/gs/gspath.py b/cloudpathlib/gs/gspath.py index bf085a78..a651a411 100644 --- a/cloudpathlib/gs/gspath.py +++ b/cloudpathlib/gs/gspath.py @@ -1,7 +1,7 @@ import os from pathlib import Path from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from ..cloudpath import CloudPath, NoStatError, register_path_class @@ -95,3 +95,10 @@ def blob(self) -> str: @property def etag(self): return self.client._get_metadata(self).get("etag") + + @property + def md5(self) -> Optional[str]: + meta = self.client._get_metadata(self) + if not meta: + return None + return meta.get("md5_hash", None) diff --git a/cloudpathlib/local/implementations/gs.py b/cloudpathlib/local/implementations/gs.py index 5e63084f..a5673c0c 100644 --- a/cloudpathlib/local/implementations/gs.py +++ b/cloudpathlib/local/implementations/gs.py @@ -53,6 +53,10 @@ def blob(self) -> str: def etag(self): return self.client._md5(self) + @property + def md5(self) -> str: + return self.client._md5(self) + LocalGSPath.__name__ = "GSPath" diff --git a/tests/mock_clients/mock_gs.py b/tests/mock_clients/mock_gs.py index 48aae606..e5763652 100644 --- a/tests/mock_clients/mock_gs.py +++ b/tests/mock_clients/mock_gs.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +import os from pathlib import Path, PurePosixPath import shutil from tempfile import TemporaryDirectory @@ -95,6 +96,10 @@ def upload_from_filename(self, filename, content_type=None): def etag(self): return "etag" + @property + def md5_hash(self): + return os.environ.get("MOCK_EXPECTED_MD5_HASH", "md5_hash") + @property def size(self): path = self.bucket / self.name diff --git a/tests/test_gs_specific.py b/tests/test_gs_specific.py index 1632acae..80f468d2 100644 --- a/tests/test_gs_specific.py +++ b/tests/test_gs_specific.py @@ -49,3 +49,19 @@ def test_as_url(gs_rig): assert "X-Goog-Date" in query_params assert "X-Goog-SignedHeaders" in query_params assert "X-Goog-Signature" in query_params + + +@pytest.mark.parametrize( + "content, expected_hash", + [ + ("hello world", "5eb63bbbe01eeed093cb22bb8f5acdc3"), + ("another test case", "4f8182cd9856777ebe3c4f5dc58dacea"), + ], +) +def test_md5_property(content, expected_hash, gs_rig, monkeypatch): + # if USE_LIVE_CLOUD this doesnt have any effect + monkeypatch.setenv("MOCK_EXPECTED_MD5_HASH", expected_hash) + + p: GSPath = gs_rig.create_cloud_path("dir_0/file0_0.txt") + p.write_text(content) + assert p.md5 == expected_hash