Skip to content

Commit

Permalink
feat: expose XorName type in Python bindings
Browse files Browse the repository at this point in the history
- Created Python wrapper for XorName type
- Added methods for creating XorName from content, bytes, and hex
- Added conversion methods to bytes and hex
- Added comprehensive tests for XorName functionality
- Updated project structure to use modular Python bindings
  • Loading branch information
dirvine committed Dec 17, 2024
1 parent 3bd5314 commit 8aaa6e3
Show file tree
Hide file tree
Showing 10 changed files with 459 additions and 133 deletions.
34 changes: 17 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
authors = ["MaidSafe Developers <[email protected]>"]
description = "Self encrypting files (convergent encryption plus obfuscation)"
documentation = "https://docs.rs/self_encryption"
edition = "2018"
edition = "2021"
homepage = "https://maidsafe.net"
license = "GPL-3.0"
license = "MIT OR BSD-3-Clause"
name = "self_encryption"
readme = "README.md"
repository = "https://github.com/maidsafe/self_encryption"
Expand All @@ -15,33 +15,30 @@ default = []
python = ["pyo3/extension-module"]

[dependencies]
aes = "~0.8.1"
aes = "0.8.3"
bincode = "~1.3.3"
hex = "~0.4.3"
bytes = "1.5.0"
cbc = { version = "0.1.2", features = ["alloc"] }
hex = "0.4.3"
lazy_static = "1.4.0"
rand = "~0.8.5"
log = "0.4.20"
memmap2 = "0.9.4"
rand = "0.8.5"
rand_chacha = "~0.3.1"
rayon = "1.5.1"
thiserror = "1.0"
sha2 = "0.10.8"
tempfile = "3.10.1"
thiserror = "1.0.57"
num_cpus = "1.13.0"
itertools = "~0.10.0"
tempfile = "3.6.0"
xor_name = "5.0.0"
pyo3 = { version = "=0.20.3", optional = true, features = ["extension-module"] }
pyo3 = { version = "0.21.0", optional = true, features = ["extension-module", "abi3-py38", "abi3"] }

[dependencies.brotli]
version = "~3.3.0"
default-features = false
features = ["std"]

[dependencies.cbc]
version = "~0.1.1"
features = ["alloc", "block-padding"]

[dependencies.bytes]
version = "1.1.0"
features = ["serde"]

[dependencies.serde]
version = "1.0.136"
features = ["derive"]
Expand All @@ -55,7 +52,10 @@ version = "1.34.0"
features = ["rt"]

[dev-dependencies]
criterion = "~0.3"
criterion = "0.5.1"
env_logger = "0.11.2"
itertools = "0.12.1"
walkdir = "2.4.0"
docopt = "~0.9.0"
clap = { version = "4.4", features = ["derive"] }

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ build-backend = "maturin"

[tool.maturin]
features = ["python"]
module-name = "_self_encryption"
module-name = "self_encryption._self_encryption"
bindings = "pyo3"
develop = true
manifest-path = "Cargo.toml"
python-packages = ["self_encryption"]
python-source = "."
compatibility = "manylinux2014"

[project]
Expand Down
126 changes: 12 additions & 114 deletions self_encryption/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,142 +4,40 @@
This library provides a secure way to encrypt data that supports deduplication while
maintaining strong security through content obfuscation and chunk interdependencies.
Key Features:
- Content-based chunking for deduplication
- Convergent encryption with obfuscation
- Self-validating chunks through content hashing
- Streaming operations for large files
- Parallel chunk processing
- Both in-memory and file-based operations
- Command-line interface for all operations
Basic Usage:
>>> from self_encryption import encrypt, decrypt
>>> data = b"Hello, World!" * 1000 # Must be at least 3072 bytes
>>> data_map, chunks = encrypt(data)
>>> decrypted = decrypt(data_map, chunks)
>>> assert data == decrypted
File Operations:
>>> from pathlib import Path
>>> from self_encryption import encrypt_from_file, decrypt_from_storage
>>> data_map, chunk_names = encrypt_from_file("input.dat", "chunks/")
>>> def get_chunk(hash_hex):
... return (Path("chunks") / hash_hex).read_bytes()
>>> decrypt_from_storage(data_map, "output.dat", get_chunk)
Streaming Operations:
>>> from self_encryption import streaming_encrypt_from_file
>>> def store_chunk(name, content):
... (Path("chunks") / name).write_bytes(content)
>>> data_map = streaming_encrypt_from_file("large_file.dat", store_chunk)
>>> print(f"Created {data_map.len()} chunks")
Command Line Usage:
The library includes a command-line interface for all operations:
# Encrypt a file
$ self-encryption encrypt-file input.dat chunks/
# Decrypt a file
$ self-encryption decrypt-file data_map.json chunks/ output.dat
# Verify a chunk
$ self-encryption verify chunks/abc123.dat
# Shrink a data map
$ self-encryption shrink data_map.json chunks/ optimized_map.json
For more information about CLI commands:
$ self-encryption --help
Classes:
DataMap - Contains metadata about encrypted chunks
Methods:
new(chunk_infos) -> DataMap
with_child(chunk_infos, child) -> DataMap
child() -> Optional[int]
is_child() -> bool
new() -> DataMap
original_file_size() -> int
len() -> int
infos() -> List[Tuple[int, bytes, bytes, int]]
is_child() -> bool
serialize() -> bytes
deserialize(data: bytes) -> DataMap
EncryptedChunk - Represents an encrypted chunk of data
Methods:
new(content: bytes) -> EncryptedChunk
content() -> bytes
from_bytes(content: bytes) -> EncryptedChunk
XorName - Content-addressed names for chunks
XorName - A 256-bit name used for addressing chunks
Methods:
new(bytes) -> XorName
from_content(content) -> XorName
as_bytes() -> bytes
Functions:
encrypt(data: bytes) -> Tuple[DataMap, List[EncryptedChunk]]
Encrypt data in memory, returning a data map and encrypted chunks.
The input data must be at least 3072 bytes.
encrypt_from_file(input_path: str, output_dir: str) -> Tuple[DataMap, List[str]]
Encrypt a file and store chunks to disk. Returns a data map and chunk names.
The input file must be at least 3072 bytes.
streaming_encrypt_from_file(input_path: str, store_chunk: Callable[[str, bytes], None]) -> DataMap
Stream-encrypt a file and store chunks using a custom storage backend.
Memory efficient for large files. Returns only the data map.
decrypt(data_map: DataMap, chunks: List[EncryptedChunk]) -> bytes
Decrypt data using provided chunks in memory.
decrypt_from_storage(data_map: DataMap, output_path: str, get_chunk: Callable) -> None
Decrypt data using chunks from storage, writing directly to a file.
Suitable for files that can fit in memory.
streaming_decrypt_from_storage(data_map: DataMap, output_path: str, get_chunks: Callable) -> None
Decrypt data using parallel chunk retrieval for improved performance.
Optimized for large files and remote storage backends.
Retrieves multiple chunks in parallel for better throughput.
shrink_data_map(data_map: DataMap, store_chunk: Callable) -> Tuple[DataMap, List[EncryptedChunk]]
Shrink a data map by recursively encrypting it. Useful for large files.
verify_chunk(name: XorName, content: bytes) -> EncryptedChunk
Verify the integrity of an encrypted chunk.
For more detailed documentation about specific functions or classes:
>>> help(self_encryption.DataMap)
>>> help(self_encryption.encrypt)
new(content: bytes) -> XorName
from_bytes(bytes: bytes) -> XorName
from_hex(hex_str: str) -> XorName
to_bytes() -> bytes
to_hex() -> str
"""

from ._self_encryption import (
DataMap,
EncryptedChunk,
XorName,
encrypt,
encrypt_from_file,
decrypt,
decrypt_from_storage,
shrink_data_map,
streaming_decrypt_from_storage,
verify_chunk,
streaming_encrypt_from_file,
)

from .cli import cli

__version__ = "0.32.2"
__version__ = "0.32.4"

__all__ = [
"DataMap",
"EncryptedChunk",
"XorName",
"encrypt",
"encrypt_from_file",
"decrypt",
"decrypt_from_storage",
"shrink_data_map",
"streaming_decrypt_from_storage",
"verify_chunk",
"streaming_encrypt_from_file",
"cli",
]
52 changes: 52 additions & 0 deletions src/python/data_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// src/python/data_map.rs
pub mod py_data_map;

use pyo3::prelude::*;
use crate::DataMap;
use crate::data_map::py_data_map::PyDataMap;

#[pymethods]
impl PyDataMap {
/// Create a new empty DataMap
#[new]
pub fn new() -> Self {
Self {
inner: DataMap::new(Vec::new()),
}
}

/// Get the original file size
#[getter]
pub fn original_file_size(&self) -> usize {
self.inner.original_file_size()
}

/// Get the number of chunks
#[getter]
pub fn chunk_count(&self) -> usize {
self.inner.len()
}

/// Check if this is a child data map
pub fn is_child(&self) -> bool {
self.inner.is_child()
}

pub fn __str__(&self) -> String {
format!("DataMap(file_size={}, chunks={})", self.original_file_size(), self.chunk_count())
}

/// Serialize the data map to bytes
pub fn serialize(&self) -> PyResult<Vec<u8>> {
bincode::serialize(&self.inner)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
}

/// Create a data map from serialized bytes
#[staticmethod]
pub fn deserialize(data: Vec<u8>) -> PyResult<Self> {
bincode::deserialize(&data)
.map(|inner| Self { inner })
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
}
}
48 changes: 48 additions & 0 deletions src/python/decryptor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use pyo3::prelude::*;
use std::path::PathBuf;
use crate::StreamSelfDecryptor;
use super::{PyDataMap, PyEncryptedChunk};

/// Python wrapper for StreamSelfDecryptor
#[pyclass]
pub struct PyStreamSelfDecryptor {
inner: StreamSelfDecryptor,
}

#[pymethods]
impl PyStreamSelfDecryptor {
/// Create a new streaming decryptor from a data map and chunks
#[new]
pub fn new(file_path: String, data_map: &PyDataMap) -> PyResult<Self> {
let path = PathBuf::from(file_path);
match StreamSelfDecryptor::decrypt_to_file(path, &data_map.inner) {
Ok(decryptor) => Ok(Self { inner: decryptor }),
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to create decryptor: {}", e))),
}
}

/// Process the next encrypted chunk
///
/// Args:
/// chunk (PyEncryptedChunk): The next encrypted chunk to process
///
/// Returns:
/// bool: True if decryption is complete, False if more chunks are needed
///
/// Raises:
/// ValueError: If decryption fails
pub fn next_encrypted(&mut self, encrypted_chunk: PyEncryptedChunk) -> PyResult<bool> {
match self.inner.next_encrypted(encrypted_chunk.inner) {
Ok(done) => Ok(done),
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Decryption failed: {}", e))),
}
}

/// Get the output file path
///
/// Returns:
/// str: Path to the decrypted output file
pub fn file_path(&self) -> String {
self.inner.file_path().to_string_lossy().into_owned()
}
}
30 changes: 30 additions & 0 deletions src/python/encrypted_chunk.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// src/python/encrypted_chunk.rs
use pyo3::prelude::*;
use bytes::Bytes;
use crate::EncryptedChunk;

/// Python wrapper for EncryptedChunk
#[pyclass]
#[derive(Clone)]
pub struct PyEncryptedChunk {
pub(crate) inner: EncryptedChunk,
}

#[pymethods]
impl PyEncryptedChunk {
/// Create a new EncryptedChunk from raw bytes
#[new]
pub fn new(content: Vec<u8>) -> Self {
Self {
inner: EncryptedChunk {
content: Bytes::from(content),
}
}
}

/// Get the encrypted content as bytes
#[getter]
pub fn content(&self) -> Vec<u8> {
self.inner.content.to_vec()
}
}
Loading

0 comments on commit 8aaa6e3

Please sign in to comment.