From cbaa0fd2854585d0b602cc147a467238036afd60 Mon Sep 17 00:00:00 2001 From: David Irvine Date: Mon, 18 Nov 2024 22:25:17 +0000 Subject: [PATCH] feat(docs): Add comprehensive Python documentation and docstrings This commit enhances the Python bindings documentation and usability: Core Changes: - Add detailed module-level documentation to self_encryption/__init__.py - Add comprehensive docstrings to all Python-exposed classes and functions - Document all parameters, return types, and exceptions - Add examples in docstrings for common use cases Documentation Improvements: - Add overview of library features and capabilities - Document all public APIs with type hints and descriptions - Add usage examples for basic and advanced features - Include detailed explanations of key concepts - Add cross-references between related functionality Python Bindings: - Add docstrings to PyDataMap, PyEncryptedChunk, and PyXorName classes - Document all class methods and attributes - Add parameter and return type documentation - Include exception information in docstrings - Add examples for common operations The changes ensure that Python users can get comprehensive help using the built-in help() function, making the library more accessible and easier to use. Documentation follows Python conventions and provides clear, practical examples for all functionality. --- Cargo.toml | 2 +- self_encryption/__init__.py | 54 +++++++++++++++ src/python.rs | 135 ++++++++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cc8bb65e0..9251875e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "GPL-3.0" name = "self_encryption" readme = "README.md" repository = "https://github.com/maidsafe/self_encryption" -version = "0.32.0" +version = "0.32.1" [features] default = [] diff --git a/self_encryption/__init__.py b/self_encryption/__init__.py index ebeb9b6ae..54e4fff7b 100644 --- a/self_encryption/__init__.py +++ b/self_encryption/__init__.py @@ -1,3 +1,57 @@ +""" +self_encryption - A convergent encryption library with obfuscation + +This library provides a secure way to encrypt data that supports deduplication while +maintaining strong security through content obfuscation and chunk interdependencies. + +Key Features: + - Content-based chunking for deduplication + - Convergent encryption with obfuscation + - Self-validating chunks through content hashing + - Streaming operations for large files + - Parallel chunk processing + - Both in-memory and file-based operations + +Basic Usage: + >>> from self_encryption import encrypt, decrypt + >>> data = b"Hello, World!" * 1000 # Must be at least 3072 bytes + >>> data_map, chunks = encrypt(data) + >>> decrypted = decrypt(data_map, chunks) + >>> assert data == decrypted + +File Operations: + >>> from pathlib import Path + >>> from self_encryption import encrypt_from_file, decrypt_from_storage + >>> data_map, chunk_names = encrypt_from_file("input.dat", "chunks/") + >>> def get_chunk(hash_hex): + ... return (Path("chunks") / hash_hex).read_bytes() + >>> decrypt_from_storage(data_map, "output.dat", get_chunk) + +Advanced Features: + - Hierarchical data maps for large files + - Streaming decryption with parallel chunk retrieval + - Chunk verification and validation + - XorName operations for content addressing + +Classes: + DataMap - Contains metadata about encrypted chunks + EncryptedChunk - Represents an encrypted chunk of data + XorName - Content-addressed names for chunks + +Functions: + encrypt(data: bytes) -> Tuple[DataMap, List[EncryptedChunk]] + encrypt_from_file(input_path: str, output_dir: str) -> Tuple[DataMap, List[str]] + decrypt(data_map: DataMap, chunks: List[EncryptedChunk]) -> bytes + decrypt_from_storage(data_map: DataMap, output_path: str, get_chunk: Callable) -> None + shrink_data_map(data_map: DataMap, store_chunk: Callable) -> Tuple[DataMap, List[EncryptedChunk]] + streaming_decrypt_from_storage(data_map: DataMap, output_path: str, get_chunks: Callable) -> None + verify_chunk(name: XorName, content: bytes) -> EncryptedChunk + +For more information about specific functions or classes, use help() on the individual items: + >>> help(self_encryption.DataMap) + >>> help(self_encryption.encrypt) +""" + from ._self_encryption import ( DataMap, EncryptedChunk, diff --git a/src/python.rs b/src/python.rs index ee01b4d31..1f53164ec 100644 --- a/src/python.rs +++ b/src/python.rs @@ -1,3 +1,4 @@ +/// Python bindings for self-encryption functionality. use crate::{ decrypt as rust_decrypt, decrypt_from_storage as rust_decrypt_from_storage, encrypt as rust_encrypt, encrypt_from_file as rust_encrypt_from_file, @@ -12,12 +13,31 @@ use std::path::PathBuf; use xor_name::XorName; #[pyclass(name = "DataMap")] +/// A data map containing information about encrypted chunks. +/// +/// The DataMap contains metadata about how a file was split and encrypted into chunks, +/// including the hashes needed to verify and decrypt the chunks. +/// +/// Attributes: +/// child (Optional[int]): The child level of this data map, if it's part of a hierarchy +/// len (int): The number of chunks in this data map +/// +/// Methods: +/// is_child() -> bool: Check if this is a child data map +/// infos() -> List[Tuple[int, bytes, bytes, int]]: Get chunk information #[derive(Clone)] struct PyDataMap { inner: RustDataMap, } #[pyclass(name = "EncryptedChunk")] +/// An encrypted chunk of data. +/// +/// Represents a single encrypted chunk of data that was created during the encryption process. +/// +/// Methods: +/// content() -> bytes: Get the encrypted content of this chunk +/// from_bytes(content: bytes) -> EncryptedChunk: Create a new chunk from bytes #[derive(Clone)] struct PyEncryptedChunk { inner: RustEncryptedChunk, @@ -32,6 +52,13 @@ struct PyXorName { #[pymethods] impl PyDataMap { #[new] + /// Create a new DataMap from chunk information. + /// + /// Args: + /// chunk_infos: List of tuples containing (index, dst_hash, src_hash, src_size) + /// + /// Returns: + /// DataMap: A new data map instance fn new(chunk_infos: Vec<(usize, Vec, Vec, usize)>) -> Self { let infos = chunk_infos .into_iter() @@ -48,6 +75,14 @@ impl PyDataMap { } #[staticmethod] + /// Create a new DataMap with a child level. + /// + /// Args: + /// chunk_infos: List of tuples containing (index, dst_hash, src_hash, src_size) + /// child: The child level for this data map + /// + /// Returns: + /// DataMap: A new data map instance with the specified child level fn with_child(chunk_infos: Vec<(usize, Vec, Vec, usize)>, child: usize) -> Self { let infos = chunk_infos .into_iter() @@ -63,18 +98,35 @@ impl PyDataMap { } } + /// Get the child level of this data map. + /// + /// Returns: + /// Optional[int]: The child level if this is a child data map, None otherwise fn child(&self) -> Option { self.inner.child() } + /// Check if this is a child data map. + /// + /// Returns: + /// bool: True if this is a child data map, False otherwise fn is_child(&self) -> bool { self.inner.is_child() } + /// Get the number of chunks in this data map. + /// + /// Returns: + /// int: The number of chunks fn len(&self) -> usize { self.inner.len() } + /// Get information about all chunks in this data map. + /// + /// Returns: + /// List[Tuple[int, bytes, bytes, int]]: List of tuples containing + /// (index, dst_hash, src_hash, src_size) for each chunk fn infos(&self) -> Vec<(usize, Vec, Vec, usize)> { self.inner .infos() @@ -94,6 +146,13 @@ impl PyDataMap { #[pymethods] impl PyEncryptedChunk { #[new] + /// Create a new EncryptedChunk from bytes. + /// + /// Args: + /// content (bytes): The encrypted content + /// + /// Returns: + /// EncryptedChunk: A new encrypted chunk instance fn new(content: Vec) -> Self { Self { inner: RustEncryptedChunk { @@ -102,11 +161,22 @@ impl PyEncryptedChunk { } } + /// Get the content of this chunk. + /// + /// Returns: + /// bytes: The encrypted content fn content(&self) -> &[u8] { &self.inner.content } #[classmethod] + /// Create a new EncryptedChunk from Python bytes. + /// + /// Args: + /// content (bytes): The encrypted content + /// + /// Returns: + /// EncryptedChunk: A new encrypted chunk instance fn from_bytes(_cls: &PyType, content: &PyBytes) -> PyResult { Ok(Self::new(content.as_bytes().to_vec())) } @@ -134,6 +204,16 @@ impl PyXorName { } #[pyfunction] +/// Encrypt data in memory. +/// +/// Args: +/// data (bytes): The data to encrypt +/// +/// Returns: +/// Tuple[DataMap, List[EncryptedChunk]]: The data map and list of encrypted chunks +/// +/// Raises: +/// ValueError: If encryption fails fn encrypt(_py: Python<'_>, data: &PyBytes) -> PyResult<(PyDataMap, Vec)> { let bytes = Bytes::from(data.as_bytes().to_vec()); let (data_map, chunks) = rust_encrypt(bytes) @@ -149,6 +229,17 @@ fn encrypt(_py: Python<'_>, data: &PyBytes) -> PyResult<(PyDataMap, Vec PyResult<(PyDataMap, Vec)> { let (data_map, chunk_names) = rust_encrypt_from_file(&PathBuf::from(input_path), &PathBuf::from(output_dir)) @@ -164,6 +255,17 @@ fn encrypt_from_file(input_path: String, output_dir: String) -> PyResult<(PyData } #[pyfunction] +/// Decrypt data using provided chunks. +/// +/// Args: +/// data_map (DataMap): The data map containing chunk information +/// chunks (List[EncryptedChunk]): The encrypted chunks +/// +/// Returns: +/// bytes: The decrypted data +/// +/// Raises: +/// ValueError: If decryption fails fn decrypt(data_map: &PyDataMap, chunks: Vec) -> PyResult> { let chunks: Vec = chunks.into_iter().map(|c| c.inner).collect(); let result = rust_decrypt(&data_map.inner, &chunks) @@ -173,6 +275,15 @@ fn decrypt(data_map: &PyDataMap, chunks: Vec) -> PyResult, data_map: &PyDataMap, @@ -195,6 +306,19 @@ fn decrypt_from_storage( } #[pyfunction] +/// Shrink a data map by recursively encrypting it. +/// +/// This is useful for handling large files that produce large data maps. +/// +/// Args: +/// data_map (DataMap): The data map to shrink +/// store_chunk (Callable[[str, bytes], None]): Function to store new chunks +/// +/// Returns: +/// Tuple[DataMap, List[EncryptedChunk]]: The shrunk data map and new chunks +/// +/// Raises: +/// ValueError: If shrinking fails fn shrink_data_map( py: Python<'_>, data_map: &PyDataMap, @@ -222,6 +346,17 @@ fn shrink_data_map( } #[pyfunction] +/// Decrypt data using parallel chunk retrieval. +/// +/// This function is optimized for performance with large files. +/// +/// Args: +/// data_map (DataMap): The data map containing chunk information +/// output_path (str): Path to write the decrypted data +/// get_chunks (Callable[[List[str]], List[bytes]]): Function to retrieve chunks in parallel +/// +/// Raises: +/// ValueError: If decryption fails fn streaming_decrypt_from_storage( py: Python<'_>, data_map: &PyDataMap,