diff --git a/.gitignore b/.gitignore index a77a6a94d..e67cc83f4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ Cargo.lock *.pdb # Ignore the client dir. It's for local experimental testing -client/ \ No newline at end of file +# client \ No newline at end of file diff --git a/README.md b/README.md index 1830b7cf9..dd4fbddd2 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,7 @@ For communication, SwarmNL leverages the powerful capabilities of libp2p. These // Prepare a RPC fetch request let fetch_key = vec!["SomeFetchKey".as_bytes().to_vec()]; - let fetch_request = AppData::FetchData { + let fetch_request = AppData::SendRpc { keys: fetch_key.clone(), peer: node4_peer_id, }; @@ -222,7 +222,7 @@ For communication, SwarmNL leverages the powerful capabilities of libp2p. These // Poll for the result if let Ok(result) = node.recv_from_network(stream_id).await { // Here, the request data was simply echoed by the remote peer - assert_eq!(AppResponse::FetchData(fetch_key), result); + assert_eq!(AppResponse::SendRpc(fetch_key), result); } // c. Gossiping e.g diff --git a/client/Cargo.toml b/client/Cargo.toml index 708e40df7..ff47f7363 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "model" +name = "node_cloning_model" version = "0.2.0" edition = "2021" @@ -7,7 +7,7 @@ edition = "2021" [dependencies] rust-ini = "0.20.0" -swarm_nl = { path = "../swarm_nl", features = ["async-std-runtime"] } +swarm_nl = { path = "../swarm_nl", features = ["tokio-runtime"] } tokio = { version = "1.37.0", features = ["full"] } # async-std = { version = "1.12", features = ["attributes"] } rand = "0.8.5" diff --git a/client/src/main.rs b/client/src/main.rs index 4de533489..3d8dbd534 100644 --- a/client/src/main.rs +++ b/client/src/main.rs @@ -1,40 +1,42 @@ //! Copyright 2024 Algorealm, Inc. -//! This example models a distributed network and is a rough sketch to help in determining concrete -//! features to be built +//! Range-based Sharding Example. +//! This example demonstrates the sharding configurations and capabilities of +//! SwarmNL. Here we will be implementing a range-based sharding policy. Also, it is important to +//! note that replication must be configured for sharding to take place. It is a prerequisite. +//! If no replication is configured explicitly, then the default replication behaviour will be +//! assumed. use std::{ - collections::HashMap, + collections::{BTreeMap, HashMap, VecDeque}, io::{self, Write}, + sync::Arc, time::Duration, }; use swarm_nl::{ - core::{gossipsub_cfg::GossipsubConfig, Core, CoreBuilder, NetworkEvent, RpcConfig}, + core::{ + gossipsub_cfg::GossipsubConfig, + replication::{ConsensusModel, ConsistencyModel, ReplNetworkConfig}, + sharding::Sharding, + ByteVector, Core, CoreBuilder, NetworkEvent, RpcConfig, + }, setup::BootstrapConfig, - Keypair, MessageId, PeerId, Port, ReplConfigData, + Keypair, MessageId, MultiaddrString, PeerId, PeerIdString, Port, }; +use tokio::sync::Mutex; -/// The id of the gossip mesh network -pub const GOSSIP_NETWORK: &str = "random"; - -/// The name/key of the replication network -pub const REPLICA_NETWORK_KEY: &str = "@@___REPLICA"; - -/// Amount of time to wait for proper sync -pub const WAIT_TIME: u64 = 3; +/// The constant that represents the id of the sharding network. Should be kept as a secret. +pub const NETWORK_SHARDING_ID: &'static str = "sharding_xx"; +/// The time to wait for events, if necessary +pub const WAIT_TIME: u64 = 2; /// Handle incoming RPC fn rpc_incoming_message_handler(data: Vec>) -> Vec> { - // Return verbatim + // Just return incomding data data } -// Handle incoming replication data -fn repl_msg_handler(data: &Vec) { - println!("Incoming >> {:?}", data); -} - /// Handle gissiping fn gossipsub_filter_fn( propagation_source: PeerId, @@ -46,70 +48,106 @@ fn gossipsub_filter_fn( true } -/// Create a deterministic node -async fn setup_node_1( - ports: (Port, Port), - deterministic_protobuf: &[u8], - repl_cfg: ReplConfigData, -) -> Core { - // Configure the node deterministically so we can connect to it - let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; +// Implement the `Sharding` trait +/// Range-based sharding implementation +pub struct RangeSharding +where + T: ToString + Send + Sync, +{ + /// A map where the key represents the upper bound of a range, and the value is the + /// corresponding shard ID + ranges: BTreeMap, +} - let config = BootstrapConfig::default() - .generate_keypair_from_protobuf("ed25519", &mut protobuf) - .with_tcp(ports.0) - .with_udp(ports.1) - // Configure replication data - .with_replication(repl_cfg); +impl RangeSharding +where + T: ToString + Send + Sync, +{ + /// Creates a new RangeSharding instance + pub fn new(ranges: BTreeMap) -> Self { + Self { ranges } + } +} - // Set up network - let builder = CoreBuilder::with_config(config) - .with_rpc(RpcConfig::Default, rpc_incoming_message_handler) - .with_replication(REPLICA_NETWORK_KEY.to_string(), repl_msg_handler); +impl Sharding for RangeSharding +where + T: ToString + Send + Sync + Clone, +{ + type Key = u64; + type ShardId = T; + + /// Locate the shard corresponding to the given key + fn locate_shard(&self, key: &Self::Key) -> Option { + // Find the first range whose upper bound is greater than or equal to the key + self.ranges + .iter() + .find(|(&upper_bound, _)| key <= &upper_bound) + .map(|(_, shard_id)| shard_id.clone()) + } +} - // Configure gossipsub - // Specify the gossip filter algorithm - let filter_fn = gossipsub_filter_fn; - let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); +/// Function to respond to a shards request to read data off a node explicitly (data forwarding +/// fetch request) +fn shard_request_handler(req: ByteVector) -> ByteVector { + // Return the request + some additional data + let mut response = req[0].clone(); + response.push(b'@'); - // Finish build - builder.build().await.unwrap() + vec![response] } -/// Create a detereministic node -async fn setup_node(ports: (Port, Port), deterministic_protobuf: &[u8]) -> Core { +// Create a determininstic node +async fn setup_node( + ports: (Port, Port), + deterministic_protobuf: &[u8], + boot_nodes: HashMap, +) -> Core { // Configure the node deterministically so we can connect to it let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; let config = BootstrapConfig::default() .generate_keypair_from_protobuf("ed25519", &mut protobuf) .with_tcp(ports.0) - .with_udp(ports.1); + .with_udp(ports.1) + // configure bootnodes, so we can connect to our sister nodes + .with_bootnodes(boot_nodes); // Set up network - let builder = CoreBuilder::with_config(config) - // Configure RPC handling - .with_rpc(RpcConfig::Default, rpc_incoming_message_handler) - // Configure replication handler - .with_replication(REPLICA_NETWORK_KEY.to_string(), repl_msg_handler); + let mut builder = CoreBuilder::with_config(config); + + // Configure RPC handling + builder = builder.with_rpc(RpcConfig::Default, rpc_incoming_message_handler); // Configure gossipsub // Specify the gossip filter algorithm let filter_fn = gossipsub_filter_fn; let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); - // Finish build - builder.build().await.unwrap() + // Configure node for replication, we will be using a strong consistency model here + let repl_config = ReplNetworkConfig::Custom { + queue_length: 150, + expiry_time: Some(10), + sync_wait_time: 5, + consistency_model: ConsistencyModel::Strong(ConsensusModel::All), + data_aging_period: 2, + }; + + builder + .with_replication(repl_config) + .with_sharding(NETWORK_SHARDING_ID.into(), shard_request_handler) + .build() + .await + .unwrap() } // #[cfg(feature = "first-node")] async fn run_node( + name: &str, ports_1: (Port, Port), ports_2: (Port, Port), ports_3: (Port, Port), peer_ids: (PeerId, PeerId), keypair: [u8; 68], - node_1: bool, ) { // Bootnodes let mut bootnodes = HashMap::new(); @@ -118,26 +156,17 @@ async fn run_node( format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), ); - // bootnodes.insert( - // peer_ids.1.to_base58(), - // format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), - // ); - - // Configure replication - let mut repl_cfg = HashMap::new(); - repl_cfg.insert(String::from(REPLICA_NETWORK_KEY), bootnodes.clone()); + bootnodes.insert( + peer_ids.1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); - let mut node = if node_1 { - setup_node_1(ports_1, &keypair[..], vec![repl_cfg]).await - } else { - setup_node(ports_1, &keypair[..]).await - }; + // Setup node + let mut node = setup_node(ports_1, &keypair[..], bootnodes).await; // Wait a little for setup and connections tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; - println!("{:?}", node.next_event().await); - // Read events generated at setup while let Some(event) = node.next_event().await { match event { @@ -163,33 +192,183 @@ async fn run_node( } } - loop { - // Create a buffer to store the input - let mut input = String::new(); + // Local storage + let local_storage = Arc::new(Mutex::new(VecDeque::new())); + + // Spin up a task to listen for replication events + let new_node = node.clone(); + let storage = local_storage.clone(); + tokio::task::spawn(async move { + let mut node = new_node.clone(); + loop { + // Check for incoming data events + if let Some(event) = node.next_event().await { + // Check for only incoming repl data + if let NetworkEvent::IncomingForwardedData { data, source } = event { + println!( + "Recieved forwarded data: {:?} from peer: {}", + data, + source.to_base58() + ); + + storage.lock().await.push_back(data[0].clone()); + } + } - // Prompt the user for input - print!("Enter some text (type 'exit' to quit): "); - io::stdout().flush().unwrap(); // Flush to ensure the prompt is displayed + // Sleep + tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; + } + }); + + // Shard Id's + let shard_id_1 = 1; + let shard_id_2 = 2; + let shard_id_3 = 3; + + // Define shard ranges (Key ranges => Shard id) + let mut ranges = BTreeMap::new(); + ranges.insert(100, shard_id_1); + ranges.insert(200, shard_id_2); + ranges.insert(300, shard_id_3); + + // Initialize the range-based sharding policy + let shard_manager = RangeSharding::new(ranges); + + // Join appropriate shards each + match name { + "Node 1" => { + if shard_manager + .join_network(node.clone(), &shard_id_1) + .await + .is_ok() + { + println!("Successfully joined shard: {}", shard_id_1); + } + }, + "Node 2" => { + if shard_manager + .join_network(node.clone(), &shard_id_2) + .await + .is_ok() + { + println!("Successfully joined shard: {}", shard_id_2); + } + }, + "Node 3" => { + if shard_manager + .join_network(node.clone(), &shard_id_3) + .await + .is_ok() + { + println!("Successfully joined shard: {}", shard_id_3); + } + }, + _ => {}, + } - // Read from stdin - match io::stdin().read_line(&mut input) { - Ok(_) => { - let trimmed_input = input.trim(); // Trim the input to remove newline characters + // Menu section + println!("\n==================="); + println!("Sharding Example Menu"); + println!("Usage:"); + println!("shard - Place data in appropriate shard"); + println!("Fetch - Request data from the network"); + println!("read - Read data stored locally on this shard"); + println!("exit - Exit the application"); - // Check if the user wants to exit - if trimmed_input.eq_ignore_ascii_case("exit") { - println!("Exiting program. Goodbye!"); - break; + loop { + // Read user input + let mut input = String::new(); + print!("> "); + + // Flush stdout to display prompt + io::stdout().flush().unwrap(); + io::stdin().read_line(&mut input).unwrap(); + + // Trim input and split into parts + let mut parts = input.trim().split_whitespace(); + let command = parts.next(); + let data = parts.collect::>(); + + // Match the first word and take action + match command { + Some("shard") => { + if data.len() >= 2 { + if let Ok(key) = data[0].parse::() { + let shard_data = &data[1..].join(" "); + println!("Sharding data with key '{}': {}...", key, shard_data); + + // Shard data across the network + match shard_manager + .shard(node.clone(), &key, vec![(*shard_data).clone().into()]) + .await + { + Ok(response) => match response { + Some(data) => { + println!( + "The data to shard is '{}'.", + String::from_utf8_lossy(&data[0]) + ); + println!("It falls into the range of the current node and will be stored locally."); + + // Save locally + local_storage + .lock() + .await + .push_back(String::from_utf8_lossy(&data[0]).to_string()); + }, + None => println!("Successfully placed data in the right shard."), + }, + Err(e) => println!("Sharding failed: {}", e.to_string()), + } + } else { + println!("Error: 'key' must be a u64"); + } + } else { + println!("Error: 'shard' command requires at least a key and data."); } - - // Replicate accross replica network - node.replicate(vec![trimmed_input.clone().into()], REPLICA_NETWORK_KEY) - .await; }, - Err(e) => { - // Handle errors if any - eprintln!("Failed to read input: {}", e); + Some("fetch") => { + if data.len() >= 2 { + if let Ok(key) = data[0].parse::() { + let request = &data[1..].join(" "); + println!("Requesting data with key '{}': {}", key, request); + + // Fetch data from network + match shard_manager + .fetch(node.clone(), &key, vec![(*request).clone().into()]) + .await + { + Ok(response) => match response { + Some(data) => { + println!( + "The response data is '{}'", + String::from_utf8_lossy(&data[0]) + ); + println!("Successfully pulled data from the network."); + }, + None => println!("Data exists locally on node."), + }, + Err(e) => println!("Fetching failed: {}", e.to_string()), + } + } else { + println!("Error: 'key' must be a u64"); + } + } else { + println!("Error: 'fetch' command requires at least a key and request data."); + } }, + Some("read") => { + println!("Local storage data:"); + while let Some(data) = local_storage.lock().await.pop_front() { + println!("- {data}"); + } + }, + Some("exit") => { + println!("Exiting the application. Goodbye!"); + break; + }, + Some(unknown) => println!("Unknown command: '{}'. Please try again.", unknown), + None => println!("No command entered. Please try again."), } } } @@ -237,58 +416,49 @@ async fn main() { .to_peer_id(); // Ports - let ports_1: (Port, Port) = (49152, 55003); + let ports_1: (Port, Port) = (49555, 55003); let ports_2: (Port, Port) = (49153, 55001); let ports_3: (Port, Port) = (49154, 55002); - // Spin up first server - #[cfg(feature = "first-node")] + // Spin up the coordinator node + #[cfg(feature = "third-node")] { run_node( + "Node 1", ports_1, ports_2, ports_3, (peer_id_2, peer_id_3), node_1_keypair, - true, ) .await; } - // Spin up second server + // Spin up second node #[cfg(feature = "second-node")] { run_node( + "Node 2", ports_2, ports_1, ports_3, (peer_id_1, peer_id_3), node_2_keypair, - false, ) .await; } - // Spin up third server - #[cfg(feature = "third-node")] + // Spin up third node + #[cfg(feature = "first-node")] { run_node( + "Node 3", ports_3, ports_1, ports_2, (peer_id_1, peer_id_2), node_3_keypair, - false, ) .await; } - - // Spin up client - #[cfg(feature = "client")] - { - // Initialize client - let file_name = "client/file.txt"; - // Run - util::run_client(file_name, http_ports).await; - } } diff --git a/tutorials/README.md b/examples/README.md similarity index 100% rename from tutorials/README.md rename to examples/README.md diff --git a/tutorials/echo_server/Cargo.toml b/examples/echo_server/Cargo.toml similarity index 100% rename from tutorials/echo_server/Cargo.toml rename to examples/echo_server/Cargo.toml diff --git a/tutorials/echo_server/Dockerfile b/examples/echo_server/Dockerfile similarity index 100% rename from tutorials/echo_server/Dockerfile rename to examples/echo_server/Dockerfile diff --git a/tutorials/echo_server/README.md b/examples/echo_server/README.md similarity index 100% rename from tutorials/echo_server/README.md rename to examples/echo_server/README.md diff --git a/tutorials/echo_server/src/main.rs b/examples/echo_server/src/main.rs similarity index 100% rename from tutorials/echo_server/src/main.rs rename to examples/echo_server/src/main.rs diff --git a/tutorials/file_sharing_app/Cargo.toml b/examples/file_sharing_app/Cargo.toml similarity index 100% rename from tutorials/file_sharing_app/Cargo.toml rename to examples/file_sharing_app/Cargo.toml diff --git a/tutorials/file_sharing_app/Dockerfile b/examples/file_sharing_app/Dockerfile similarity index 100% rename from tutorials/file_sharing_app/Dockerfile rename to examples/file_sharing_app/Dockerfile diff --git a/tutorials/file_sharing_app/README.md b/examples/file_sharing_app/README.md similarity index 99% rename from tutorials/file_sharing_app/README.md rename to examples/file_sharing_app/README.md index 0f451cbfa..9d86c2dff 100644 --- a/tutorials/file_sharing_app/README.md +++ b/examples/file_sharing_app/README.md @@ -287,7 +287,7 @@ async fn run_node_2() { let fetch_key = vec![value]; // prepare fetch request - let fetch_request = AppData::FetchData { + let fetch_request = AppData::SendRpc { keys: fetch_key.clone(), peer: node_1_peer_id.clone(), // The peer to query for data }; @@ -303,7 +303,7 @@ async fn run_node_2() { // Poll the network for the result if let Ok(response) = node_2.recv_from_network(stream_id).await { - if let AppResponse::FetchData(response_file) = response { + if let AppResponse::SendRpc(response_file) = response { // Get the file let file = response_file[0].clone(); diff --git a/tutorials/file_sharing_app/bootstrap_config.ini b/examples/file_sharing_app/bootstrap_config.ini similarity index 100% rename from tutorials/file_sharing_app/bootstrap_config.ini rename to examples/file_sharing_app/bootstrap_config.ini diff --git a/tutorials/file_sharing_app/run_both_nodes.sh b/examples/file_sharing_app/run_both_nodes.sh similarity index 100% rename from tutorials/file_sharing_app/run_both_nodes.sh rename to examples/file_sharing_app/run_both_nodes.sh diff --git a/tutorials/file_sharing_app/src/main.rs b/examples/file_sharing_app/src/main.rs similarity index 98% rename from tutorials/file_sharing_app/src/main.rs rename to examples/file_sharing_app/src/main.rs index 009d72693..f7ba261bd 100644 --- a/tutorials/file_sharing_app/src/main.rs +++ b/examples/file_sharing_app/src/main.rs @@ -255,7 +255,7 @@ async fn run_node_2() { let fetch_key = vec![value]; // prepare fetch request - let fetch_request = AppData::FetchData { + let fetch_request = AppData::SendRpc { keys: fetch_key.clone(), peer: node_1_peer_id.clone(), // The peer to query for data }; @@ -271,7 +271,7 @@ async fn run_node_2() { // Poll the network for the result if let Ok(response) = node_2.recv_from_network(stream_id).await { - if let AppResponse::FetchData(response_file) = response { + if let AppResponse::SendRpc(response_file) = response { // Get the file let file = response_file[0].clone(); diff --git a/examples/replication/eventual_consistency/Cargo.toml b/examples/replication/eventual_consistency/Cargo.toml new file mode 100644 index 000000000..edb04f28a --- /dev/null +++ b/examples/replication/eventual_consistency/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "eventual_consistency_model" +version = "0.2.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rust-ini = "0.20.0" +swarm_nl = { path = "../../../swarm_nl", features = ["async-std-runtime"] } +# tokio = { version = "1.37.0", features = ["full"] } +async-std = { version = "1.12", features = ["attributes"] } +rand = "0.8.5" +warp = "0.3.7" +reqwest = "0.12.8" + +[features] +client = [] +first-node = [] +second-node = [] +third-node = [] \ No newline at end of file diff --git a/sketch/client-server/bootstrap_config.ini b/examples/replication/eventual_consistency/bootstrap_config.ini similarity index 100% rename from sketch/client-server/bootstrap_config.ini rename to examples/replication/eventual_consistency/bootstrap_config.ini diff --git a/examples/replication/eventual_consistency/src/main.rs b/examples/replication/eventual_consistency/src/main.rs new file mode 100644 index 000000000..017d16b35 --- /dev/null +++ b/examples/replication/eventual_consistency/src/main.rs @@ -0,0 +1,301 @@ +//! Copyright 2024 Algorealm, Inc. + +//! This example demonstrates the replication of data accross nodes in a network using the +//! eventual data consistency synchronization model. Here we are spinning up three replica nodes that accept data +//! from standard input to read contents off the replica buffer or to immedately replicates the input data across its replica network. + +use std::{collections::HashMap, io::{self, Write}, time::Duration}; + +use swarm_nl::{ + core::{ + gossipsub_cfg::GossipsubConfig, + replication::{ConsensusModel, ConsistencyModel, ReplNetworkConfig}, + Core, CoreBuilder, NetworkEvent, RpcConfig, + }, + setup::BootstrapConfig, + Keypair, MessageId, MultiaddrString, PeerId, PeerIdString, Port, +}; + +/// The constant that represents the id of the replica network. Should be kept as a secret +pub const REPL_NETWORK_ID: &'static str = "replica_xx"; +/// The time to wait for events, if necessary +pub const WAIT_TIME: u64 = 2; + +/// Handle incoming RPC +fn rpc_incoming_message_handler(data: Vec>) -> Vec> { + // Just return incomding data + data +} + +/// Handle gissiping +fn gossipsub_filter_fn( + propagation_source: PeerId, + message_id: MessageId, + source: Option, + topic: String, + data: Vec, +) -> bool { + true +} + +// Create a determininstic node +async fn setup_node( + ports: (Port, Port), + deterministic_protobuf: &[u8], + boot_nodes: HashMap, +) -> Core { + // Configure the node deterministically so we can connect to it + let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; + + let config = BootstrapConfig::default() + .generate_keypair_from_protobuf("ed25519", &mut protobuf) + .with_tcp(ports.0) + .with_udp(ports.1) + // configure bootnodes, so we can connect to our sister nodes + .with_bootnodes(boot_nodes); + + // Set up network + let mut builder = CoreBuilder::with_config(config); + + // Configure RPC handling + builder = builder.with_rpc(RpcConfig::Default, rpc_incoming_message_handler); + + // Configure gossipsub + // Specify the gossip filter algorithm + let filter_fn = gossipsub_filter_fn; + let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); + + // Configure node for replication, we will be using an eventual consistency model here + let repl_config = ReplNetworkConfig::Custom { + queue_length: 150, + expiry_time: Some(10), + sync_wait_time: 5, + consistency_model: ConsistencyModel::Eventual, + data_aging_period: 2, + }; + + builder.with_replication(repl_config).build().await.unwrap() +} + +// #[cfg(feature = "first-node")] +async fn run_node( + name: &str, + ports_1: (Port, Port), + ports_2: (Port, Port), + ports_3: (Port, Port), + peer_ids: (PeerId, PeerId), + keypair: [u8; 68], +) { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_ids.0.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + + bootnodes.insert( + peer_ids.1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + // Setup node + let mut node = setup_node(ports_1, &keypair[..], bootnodes).await; + + // Join replica network + println!("Joining replication network"); + if let Ok(_) = node.join_repl_network(REPL_NETWORK_ID.into()).await { + println!("Replica network successfully joined"); + } else { + panic!("Failed to join replica network"); + } + + // Wait a little for setup and connections + async_std::task::sleep(Duration::from_secs(WAIT_TIME)).await; + + // Read events generated at setup + while let Some(event) = node.next_event().await { + match event { + NetworkEvent::NewListenAddr { + local_peer_id, + listener_id: _, + address, + } => { + // Announce interfaces we're listening on + println!("Peer id: {}", local_peer_id); + println!("We're listening on {}", address); + }, + NetworkEvent::ConnectionEstablished { + peer_id, + connection_id: _, + endpoint: _, + num_established: _, + established_in: _, + } => { + println!("Connection established with peer: {:?}", peer_id); + }, + _ => {}, + } + } + + // Spin up a task to listen for replication events + let new_node = node.clone(); + async_std::task::spawn(async move { + let mut node = new_node.clone(); + loop { + // Check for incoming data events + if let Some(event) = node.next_event().await { + // Check for only incoming repl data + if let NetworkEvent::ReplicaDataIncoming { source, .. } = event { + println!("Recieved incoming replica data from {}", source.to_base58()); + } + } + + // Sleep + async_std::task::sleep(Duration::from_secs(WAIT_TIME)).await; + } + }); + + // Wait for some time for replication protocol intitialization across the network + async_std::task::sleep(Duration::from_secs(WAIT_TIME + 3)).await; + + println!("\n==================="); + println!("Replication Test Menu"); + println!("Usage:"); + println!("repl - Replicate to peers"); + println!("read - Read content from buffer"); + println!("exit - Exit the application"); + loop { + // Read user input + let mut input = String::new(); + print!("> "); + + io::stdout().flush().unwrap(); // Flush stdout to display prompt + io::stdin().read_line(&mut input).unwrap(); + + // Trim input and split into parts + let mut parts = input.trim().split_whitespace(); + let command = parts.next(); // Get the first word + let data = parts.collect::>().join(" "); // Collect the rest as data + + // Match the first word and take action + match command { + Some("repl") => { + if !data.is_empty() { + println!("Replicating data: {}", data); + // Replicate input + match node + .replicate(vec![data.into()], REPL_NETWORK_ID) + .await + { + Ok(_) => println!("Replication successful"), + Err(e) => println!("Replication failed: {}", e.to_string()), + } + } else { + println!("Error: No data provided to replicate."); + } + }, + Some("read") => { + println!("Reading contents from buffer..."); + while let Some(repl_data) = node.consume_repl_data(REPL_NETWORK_ID).await { + println!("Buffer Data: {}", repl_data.data[0],); + } + }, + Some("exit") => { + println!("Exiting the application. Goodbye!"); + break; + }, + Some(unknown) => println!("Unknown command: '{}'. Please try again.", unknown), + None => println!("No command entered. Please try again."), + } + } +} + +#[async_std::main] +async fn main() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, + 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, + 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, + 148, 159, 36, 170, 109, 178, + ]; + + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, + 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, + 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, + 10, 127, 128, 52, 52, 68, 31, + ]; + + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, + 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, + 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, + 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49555, 55003); + let ports_2: (Port, Port) = (49153, 55001); + let ports_3: (Port, Port) = (49154, 55002); + + // Spin up the coordinator node + #[cfg(feature = "third-node")] + { + run_node( + "Node 1", + ports_1, + ports_2, + ports_3, + (peer_id_2, peer_id_3), + node_1_keypair, + ) + .await; + } + + // Spin up second node + #[cfg(feature = "second-node")] + { + run_node( + "Node 2", + ports_2, + ports_1, + ports_3, + (peer_id_1, peer_id_3), + node_2_keypair, + ) + .await; + } + + // Spin up third node + #[cfg(feature = "first-node")] + { + run_node( + "Node 3", + ports_3, + ports_1, + ports_2, + (peer_id_1, peer_id_2), + node_3_keypair, + ) + .await; + } +} diff --git a/sketch/client-server/Cargo.toml b/examples/replication/peer_cloning/Cargo.toml similarity index 79% rename from sketch/client-server/Cargo.toml rename to examples/replication/peer_cloning/Cargo.toml index b19565015..7bd72d364 100644 --- a/sketch/client-server/Cargo.toml +++ b/examples/replication/peer_cloning/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "model" +name = "peer_cloning_model" version = "0.2.0" edition = "2021" @@ -7,7 +7,7 @@ edition = "2021" [dependencies] rust-ini = "0.20.0" -swarm_nl = { path = "../../swarm_nl", features = ["async-std-runtime"] } +swarm_nl = { path = "../../../swarm_nl", features = ["async-std-runtime"] } tokio = { version = "1.37.0", features = ["full"] } # async-std = { version = "1.12", features = ["attributes"] } rand = "0.8.5" diff --git a/examples/replication/peer_cloning/bootstrap_config.ini b/examples/replication/peer_cloning/bootstrap_config.ini new file mode 100644 index 000000000..05e94fc53 --- /dev/null +++ b/examples/replication/peer_cloning/bootstrap_config.ini @@ -0,0 +1,14 @@ +[ports] +tcp=3000 +udp=4000 + +[auth] +crypto=Ed25519 +protobuf_keypair=[8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177] +Crypto=Ed25519 + +[bootstrap] +boot_nodes=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/x.x.x.x/tcp/1509, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt:/ip4/x.x.x.x/tcp/1509] + +[blacklist] +blacklist=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt] diff --git a/examples/replication/peer_cloning/src/main.rs b/examples/replication/peer_cloning/src/main.rs new file mode 100644 index 000000000..0d93d1b48 --- /dev/null +++ b/examples/replication/peer_cloning/src/main.rs @@ -0,0 +1,329 @@ +//! Copyright 2024 Algorealm, Inc. + +//! This example demonstrates the complete replication of a node's buffer data (cloning). +//! Particularly useful when a strong synchronization model is in effect, hence bringing the new +//! node up to speed quickly. The data is stored directly into the primary public buffer for the +//! application layer's consumption instead of the transient buffer. + +use std::{ + collections::HashMap, + io::{self, Write}, + time::Duration, +}; + +use swarm_nl::{ + core::{ + gossipsub_cfg::GossipsubConfig, + replication::{ConsensusModel, ConsistencyModel, ReplNetworkConfig}, + Core, CoreBuilder, NetworkEvent, RpcConfig, + }, + setup::BootstrapConfig, + Keypair, MessageId, MultiaddrString, PeerId, PeerIdString, Port, +}; + +/// The constant that represents the id of the replica network. Should be kept as a secret +pub const REPL_NETWORK_ID: &'static str = "replica_xx"; +/// The time to wait for events, if necessary +pub const WAIT_TIME: u64 = 2; + +/// Handle incoming RPC +fn rpc_incoming_message_handler(data: Vec>) -> Vec> { + // Just return incomding data + data +} + +/// Handle gissiping +fn gossipsub_filter_fn( + propagation_source: PeerId, + message_id: MessageId, + source: Option, + topic: String, + data: Vec, +) -> bool { + true +} + +// Create a determininstic node +async fn setup_node( + ports: (Port, Port), + deterministic_protobuf: &[u8], + boot_nodes: HashMap, +) -> Core { + // Configure the node deterministically so we can connect to it + let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; + + let config = BootstrapConfig::default() + .generate_keypair_from_protobuf("ed25519", &mut protobuf) + .with_tcp(ports.0) + .with_udp(ports.1) + // configure bootnodes, so we can connect to our sister nodes + .with_bootnodes(boot_nodes); + + // Set up network + let mut builder = CoreBuilder::with_config(config); + + // Configure RPC handling + builder = builder.with_rpc(RpcConfig::Default, rpc_incoming_message_handler); + + // Configure gossipsub + // Specify the gossip filter algorithm + let filter_fn = gossipsub_filter_fn; + let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); + + // Configure node for replication, we will be using a strong consistency model here + let repl_config = ReplNetworkConfig::Custom { + queue_length: 150, + expiry_time: Some(10), + sync_wait_time: 5, + consistency_model: ConsistencyModel::Strong(ConsensusModel::All), + data_aging_period: 2, + }; + + builder.with_replication(repl_config).build().await.unwrap() +} + +// #[cfg(feature = "first-node")] +async fn run_node( + name: &str, + ports_1: (Port, Port), + ports_2: (Port, Port), + ports_3: (Port, Port), + peer_ids: (PeerId, PeerId), + keypair: [u8; 68], +) { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_ids.0.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + + bootnodes.insert( + peer_ids.1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + // Setup node + let mut node = setup_node(ports_1, &keypair[..], bootnodes).await; + + // Join replica network + println!("Joining replication network"); + if let Ok(_) = node.join_repl_network(REPL_NETWORK_ID.into()).await { + println!("Replica network successfully joined"); + } else { + panic!("Failed to join replica network"); + } + + // Wait a little for setup and connections + tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; + + // Read events generated at setup + while let Some(event) = node.next_event().await { + match event { + NetworkEvent::NewListenAddr { + local_peer_id, + listener_id: _, + address, + } => { + // Announce interfaces we're listening on + println!("Peer id: {}", local_peer_id); + println!("We're listening on {}", address); + }, + NetworkEvent::ConnectionEstablished { + peer_id, + connection_id: _, + endpoint: _, + num_established: _, + established_in: _, + } => { + println!("Connection established with peer: {:?}", peer_id); + }, + _ => {}, + } + } + + // Spin up a task to listen for replication events + let new_node = node.clone(); + tokio::task::spawn(async move { + let mut node = new_node.clone(); + loop { + // Check for incoming data events + if let Some(event) = node.next_event().await { + // Check for only incoming repl data + if let NetworkEvent::ReplicaDataIncoming { source, .. } = event { + println!("Recieved incoming replica data from {}", source.to_base58()); + } + } + + // Sleep + tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; + } + }); + + // Wait for some time for replication protocol intitialization across the network + tokio::time::sleep(Duration::from_secs(WAIT_TIME + 3)).await; + + println!("\n==================="); + println!("Replication Example Menu"); + println!("Usage:"); + println!("repl - Replicate to peers"); + println!("read - Read content from buffer"); + println!("clone - Clone a node and replicate it's buffer"); + println!("exit - Exit the application"); + loop { + // Read user input + let mut input = String::new(); + print!("> "); + + io::stdout().flush().unwrap(); // Flush stdout to display prompt + io::stdin().read_line(&mut input).unwrap(); + + // Trim input and split into parts + let mut parts = input.trim().split_whitespace(); + let command = parts.next(); // Get the first word + let data = parts.collect::>().join(" "); // Collect the rest as data + + // Match the first word and take action + match command { + Some("repl") => { + if !data.is_empty() { + println!("Replicating data: {}", data); + // Replicate input + match node.replicate(vec![data.into()], REPL_NETWORK_ID).await { + Ok(_) => println!("Replication successful"), + Err(e) => println!("Replication failed: {}", e.to_string()), + } + } else { + println!("Error: No data provided to replicate."); + } + }, + Some("clone") => { + if !data.is_empty() { + if let Some(peer_str) = data.split(" ").next() { + match peer_str.parse::() { + Ok(peer_id) => { + // Clone + println!("Attempting to clone replica peer: {}", peer_id); + match node.replicate_buffer(REPL_NETWORK_ID.into(), peer_id).await { + Ok(_) => println!("Replication successful: {}", peer_id), + Err(e) => { + println!("Error: Failed to clone peer {}: {:?}", peer_id, e) + }, + } + }, + Err(_) => { + println!("Error: Invalid PeerId provided. Please ensure the ID is in the correct format."); + }, + } + } else { + println!("Error: Could not parse peer ID. Please provide a valid ID."); + } + } else { + println!("Error: No data provided to replicate. Usage: clone "); + } + }, + Some("read") => { + println!("Reading contents from buffer..."); + while let Some(repl_data) = node.consume_repl_data(REPL_NETWORK_ID).await { + println!("Buffer Data: {}", repl_data.data[0],); + } + }, + Some("exit") => { + println!("Exiting the application. Goodbye!"); + break; + }, + Some(unknown) => println!("Unknown command: '{}'. Please try again.", unknown), + None => println!("No command entered. Please try again."), + } + } +} + +#[tokio::main] +async fn main() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, + 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, + 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, + 148, 159, 36, 170, 109, 178, + ]; + + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, + 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, + 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, + 10, 127, 128, 52, 52, 68, 31, + ]; + + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, + 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, + 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, + 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49555, 55003); + let ports_2: (Port, Port) = (49153, 55001); + let ports_3: (Port, Port) = (49154, 55002); + + // Spin up the coordinator node + #[cfg(feature = "third-node")] + { + run_node( + "Node 1", + ports_1, + ports_2, + ports_3, + (peer_id_2, peer_id_3), + node_1_keypair, + ) + .await; + } + + // Spin up second node + #[cfg(feature = "second-node")] + { + run_node( + "Node 2", + ports_2, + ports_1, + ports_3, + (peer_id_1, peer_id_3), + node_2_keypair, + ) + .await; + } + + // Spin up third node + #[cfg(feature = "first-node")] + { + run_node( + "Node 3", + ports_3, + ports_1, + ports_2, + (peer_id_1, peer_id_2), + node_3_keypair, + ) + .await; + } +} diff --git a/examples/replication/strong_consistency/Cargo.toml b/examples/replication/strong_consistency/Cargo.toml new file mode 100644 index 000000000..09a15f2c3 --- /dev/null +++ b/examples/replication/strong_consistency/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "strong_consistency_model" +version = "0.2.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rust-ini = "0.20.0" +swarm_nl = { path = "../../../swarm_nl", features = ["tokio-runtime"] } +tokio = { version = "1.37.0", features = ["full"] } +# async-std = { version = "1.12", features = ["attributes"] } +rand = "0.8.5" +warp = "0.3.7" +reqwest = "0.12.8" + +[features] +client = [] +first-node = [] +second-node = [] +third-node = [] \ No newline at end of file diff --git a/examples/replication/strong_consistency/bootstrap_config.ini b/examples/replication/strong_consistency/bootstrap_config.ini new file mode 100644 index 000000000..05e94fc53 --- /dev/null +++ b/examples/replication/strong_consistency/bootstrap_config.ini @@ -0,0 +1,14 @@ +[ports] +tcp=3000 +udp=4000 + +[auth] +crypto=Ed25519 +protobuf_keypair=[8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177] +Crypto=Ed25519 + +[bootstrap] +boot_nodes=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/x.x.x.x/tcp/1509, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt:/ip4/x.x.x.x/tcp/1509] + +[blacklist] +blacklist=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt] diff --git a/examples/replication/strong_consistency/src/main.rs b/examples/replication/strong_consistency/src/main.rs new file mode 100644 index 000000000..69ff8688a --- /dev/null +++ b/examples/replication/strong_consistency/src/main.rs @@ -0,0 +1,287 @@ +//! Copyright 2024 Algorealm, Inc. + +//! This example demonstrates the replication of data accross nodes in a network using the +//! strong data consistency synchronization model. Here we are spinning up three replica nodes that accept data +//! from standard input and then immedately replicates the data across the replica network. + +use std::{collections::HashMap, io, time::Duration}; + +use swarm_nl::{ + core::{ + gossipsub_cfg::GossipsubConfig, + replication::{ConsensusModel, ConsistencyModel, ReplConfigData, ReplNetworkConfig}, + Core, CoreBuilder, NetworkEvent, RpcConfig, + }, + setup::BootstrapConfig, + Keypair, MessageId, MultiaddrString, PeerId, PeerIdString, Port, +}; + +/// The constant that represents the id of the replica network. Should be kept as a secret +pub const REPL_NETWORK_ID: &'static str = "replica_xx"; +/// The time to wait for events, if necessary +pub const WAIT_TIME: u64 = 2; + +/// Handle incoming RPC +fn rpc_incoming_message_handler(data: Vec>) -> Vec> { + // Just return incomding data + data +} + +/// Handle gissiping +fn gossipsub_filter_fn( + propagation_source: PeerId, + message_id: MessageId, + source: Option, + topic: String, + data: Vec, +) -> bool { + true +} + +// Create a determininstic node +async fn setup_node( + ports: (Port, Port), + deterministic_protobuf: &[u8], + boot_nodes: HashMap, +) -> Core { + // Configure the node deterministically so we can connect to it + let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; + + let config = BootstrapConfig::default() + .generate_keypair_from_protobuf("ed25519", &mut protobuf) + .with_tcp(ports.0) + .with_udp(ports.1) + // configure bootnodes, so we can connect to our sister nodes + .with_bootnodes(boot_nodes); + + // Set up network + let mut builder = CoreBuilder::with_config(config); + + // Configure RPC handling + builder = builder.with_rpc(RpcConfig::Default, rpc_incoming_message_handler); + + // Configure gossipsub + // Specify the gossip filter algorithm + let filter_fn = gossipsub_filter_fn; + let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); + + // Configure node for replication, we will be using a strong consistency model here + let repl_config = ReplNetworkConfig::Custom { + queue_length: 150, + expiry_time: Some(10), + sync_wait_time: 5, + consistency_model: ConsistencyModel::Strong(ConsensusModel::All), + data_aging_period: 2, + }; + + builder.with_replication(repl_config).build().await.unwrap() +} + +// #[cfg(feature = "first-node")] +async fn run_node( + name: &str, + ports_1: (Port, Port), + ports_2: (Port, Port), + ports_3: (Port, Port), + peer_ids: (PeerId, PeerId), + keypair: [u8; 68], +) { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_ids.0.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + + bootnodes.insert( + peer_ids.1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + // Setup node 1 and try to connect to node 2 and 3 + let mut node = setup_node(ports_1, &keypair[..], bootnodes).await; + + // Join replica network + println!("Joining replication network"); + if let Ok(_) = node.join_repl_network(REPL_NETWORK_ID.into()).await { + println!("Replica network successfully joined"); + } else { + panic!("Failed to join replica network"); + } + + // Wait a little for setup and connections + tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; + + // Read events generated at setup + while let Some(event) = node.next_event().await { + match event { + NetworkEvent::NewListenAddr { + local_peer_id, + listener_id: _, + address, + } => { + // Announce interfaces we're listening on + println!("Peer id: {}", local_peer_id); + println!("We're listening on {}", address); + }, + NetworkEvent::ConnectionEstablished { + peer_id, + connection_id: _, + endpoint: _, + num_established: _, + established_in: _, + } => { + println!("Connection established with peer: {:?}", peer_id); + }, + _ => {}, + } + } + + // Spin up a task to listen for replication events + let new_node = node.clone(); + tokio::task::spawn(async move { + let mut node = new_node.clone(); + loop { + // Check for incoming data events + if let Some(event) = node.next_event().await { + // Check for only incoming repl data + if let NetworkEvent::ReplicaDataIncoming { source, .. } = event { + println!("Recieved incoming replica data from {}", source.to_base58()); + } + } + + // Try to read the data from the buffer. Since we are using a strong + // consistency model, we will not be able to read anything unless the + // confirmations are complete + if let Some(repl_data) = node.consume_repl_data(REPL_NETWORK_ID).await { + println!( + "Data gotten from replica: {} ({} confirmations)", + repl_data.data[0], + repl_data.confirmations.unwrap() + ); + } + + // Sleep + tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; + } + }); + + // Wait for some time for replication protocol intitialization across the network + tokio::time::sleep(Duration::from_secs(WAIT_TIME + 3)).await; + + // Read input from standard input and then replicate it to peers + let stdin = io::stdin(); + loop { + print!("Enter some input: "); + io::Write::flush(&mut io::stdout()).unwrap(); // Ensure the prompt is displayed immediately + + let mut input = String::new(); + stdin.read_line(&mut input).unwrap(); + let trimmed_input = input.trim(); + + if trimmed_input == "exit" { + break; + } + + println!("Replicating..."); + + // Replicate input + match node + .replicate(vec![trimmed_input.into()], REPL_NETWORK_ID) + .await + { + Ok(_) => println!("Replication successful"), + Err(e) => println!("Replication failed: {}", e.to_string()), + } + } +} + +#[tokio::main] +async fn main() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, + 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, + 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, + 148, 159, 36, 170, 109, 178, + ]; + + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, + 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, + 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, + 10, 127, 128, 52, 52, 68, 31, + ]; + + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, + 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, + 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, + 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49555, 55003); + let ports_2: (Port, Port) = (49153, 55001); + let ports_3: (Port, Port) = (49154, 55002); + + // Spin up the coordinator node + #[cfg(feature = "third-node")] + { + run_node( + "Node 1", + ports_1, + ports_2, + ports_3, + (peer_id_2, peer_id_3), + node_1_keypair, + ) + .await; + } + + // Spin up second node + #[cfg(feature = "second-node")] + { + run_node( + "Node 2", + ports_2, + ports_1, + ports_3, + (peer_id_1, peer_id_3), + node_2_keypair, + ) + .await; + } + + // Spin up third node + #[cfg(feature = "first-node")] + { + run_node( + "Node 3", + ports_3, + ports_1, + ports_2, + (peer_id_1, peer_id_2), + node_3_keypair, + ) + .await; + } +} diff --git a/tutorials/simple_game/Cargo.toml b/examples/simple_game/Cargo.toml similarity index 100% rename from tutorials/simple_game/Cargo.toml rename to examples/simple_game/Cargo.toml diff --git a/tutorials/simple_game/Dockerfile b/examples/simple_game/Dockerfile similarity index 100% rename from tutorials/simple_game/Dockerfile rename to examples/simple_game/Dockerfile diff --git a/tutorials/simple_game/README.md b/examples/simple_game/README.md similarity index 100% rename from tutorials/simple_game/README.md rename to examples/simple_game/README.md diff --git a/tutorials/simple_game/run_both_nodes.sh b/examples/simple_game/run_both_nodes.sh similarity index 100% rename from tutorials/simple_game/run_both_nodes.sh rename to examples/simple_game/run_both_nodes.sh diff --git a/tutorials/simple_game/src/main.rs b/examples/simple_game/src/main.rs similarity index 100% rename from tutorials/simple_game/src/main.rs rename to examples/simple_game/src/main.rs diff --git a/research.md b/research.md new file mode 100644 index 000000000..e106bd71c --- /dev/null +++ b/research.md @@ -0,0 +1,87 @@ +# **SwarmNL: A Library to Build Custom Networking Layers for Decentralized Applications** + +SwarmNL addresses two critical concerns in distributed systems: **Scaling** and **Fault Tolerance**. This section focuses on how SwarmNL handles **Fault Tolerance** using redundancy. + +## **Fault Tolerance** + +Fault tolerance in SwarmNL is primarily achieved through **redundancy**, which ensures that other nodes in the network remain operational to service incoming requests, even in the event of failures. SwarmNL handles redundancy using one key technique: **Replication**. + +### **Replication** + +SwarmNL facilitates seamless data replication among configured nodes in the network. This replication is governed by a configurable **consistency model**, which ensures synchronized data across nodes. SwarmNL supports two consistency models: + +--- + +### **1. Strong Consistency** + +In the **Strong Consistency** model, replicated data is temporarily stored in a buffer and is only committed to the public buffer after ensuring synchronization across all nodes. The process involves the following steps: + +1. **Receiving Data**: + + - When replicated data arrives at a node, it includes a flag (`confirmation count`) initialized to `1`, indicating the originating node already has the data. + - This data is stored in the **temporary buffer** of the receiving node. + +2. **Broadcasting Data**: + + - The receiving node immediately broadcasts the data to its replica peers. + - Each peer increments the `confirmation count` upon receiving the broadcast. + +3. **Confirming Consistency**: + - When the `confirmation count` reaches `node_count - 1` (e.g., 2 for a 3-node network), the data is deemed consistent. + - The data is then moved from the temporary buffer to the primary (public) buffer, making it accessible to the application layer. + +This model guarantees that data is fully synchronized across all replicas before it becomes available to the application layer. + +--- + +### **2. Eventual Consistency** + +In the **Eventual Consistency** model, replicated data is immediately stored in the **public buffer**. Consistency is achieved over time through a periodic synchronization task. The process works as follows: + +1. **Buffer Queue**: + + - The public buffer uses a `BTreeSet` to organize replicated data based on a **Lamport clock**. + +2. **Synchronization Task**: + + - A background task periodically broadcasts the `MessageId`s of data in the queue to all replica peers. + - Peers compare the received `MessageId`s with their local buffer to identify missing data. + +3. **Retrieving Missing Data**: + - Peers send an RPC request to retrieve missing data and add it to their buffers. + - The system trusts that, over time, all nodes will achieve eventual consistency as data propagates and synchronizes across the network. + +#### **Buffer Aging and Eviction** + +- The buffer has a **maximum size** and supports an **aging mechanism**: + - Each data item has an associated lifespan (`max age`). + - If the buffer is full, items exceeding their lifespan are lazily removed during the next data insertion. + - This ensures data remains accessible for sufficient time while optimizing buffer space. + +In the eventual consistency model, the application layer operates with the expectation that data becomes consistent over time, even if it has been consumed from the buffer. + +## **Scaling** + +Scaling the network is primarily achieved through **replication** and **sharding**. Replication has already been discussed in the context of fault tolerance. Scaling enables improved read and write performance by partitioning the network into logical `shards`, each responsible for a subset of the data. A `shard` may span multiple nodes, and each shard manages its own data storage and operations. + +### **Sharding in SwarmNL** + +SwarmNL provides a trait called `Sharding` to implement sharding. To maintain flexibility and configurability, developers are required to implement the `locate_shard()` function within the trait. This function maps a key or data item to a logical shard, allowing developers to define sharding strategies tailored to their application's needs. + +The `Sharding` trait also includes generic functions for: + +- Adding nodes to a shard. +- Joining or exiting a shard. +- Fetching data over the network. +- Storing data in the appropriate shard. +- **Data forwarding**, explained below. + +### **Data Forwarding** + +Data forwarding occurs when a node receives data it is not configured to store or process due to the shard's configuration. In such cases, the node identifies the appropriate shard and forwards the data to the corresponding nodes within that shard. + +### **Shards and Replication** + +All nodes within a shard act as replicas of each other and synchronize their data based on the consistency model configured during replication setup. This tight integration between sharding and replication ensures that the data within each shard is reliable and consistent, as defined by the application's requirements. + +By combining replication and sharding, SwarmNL offers a scalable and fault-tolerant framework for managing decentralized networks while giving developers the freedom to design shard configurations that align with their use case. diff --git a/sketch/client-server/src/main.rs b/sketch/client-server/src/main.rs deleted file mode 100644 index 9eb954fa1..000000000 --- a/sketch/client-server/src/main.rs +++ /dev/null @@ -1,564 +0,0 @@ -//! Copyright 2024 Algorealm, Inc. - -//! This example models a distributed network and is a rough sketch to help in determining concrete -//! features to be built - -use std::{ - collections::{HashMap, VecDeque}, - sync::Arc, - time::Duration, -}; - -use rand::random; -use swarm_nl::{ - core::{ - gossipsub_cfg::GossipsubConfig, AppData, AppResponse, Core, CoreBuilder, NetworkEvent, - RpcConfig, - }, - setup::BootstrapConfig, - Keypair, MessageId, MultiaddrString, PeerId, PeerIdString, Port, -}; -use tokio::sync::Mutex; -use util::sleep_for; - -/// The id of the gossip mesh network -pub const GOSSIP_NETWORK: &str = "random"; - -/// Amount of time to wait for proper sync -pub const WAIT_TIME: u64 = 3; - -/// Basic node information -pub struct NodeInfo { - /// Node identifier, - name: String, - /// Broadcast state, used to make sure we've recieved from both our peers - state: HashMap, - /// Nonce - nonce: Vec, -} - -/// Handle incoming RPC -fn rpc_incoming_message_handler(data: Vec>) -> Vec> { - // Geenerate and return random number to peer - let random_number = util::generate_random_number(); - - vec![vec![random_number]] -} - -/// Handle gissiping -fn gossipsub_filter_fn( - propagation_source: PeerId, - message_id: MessageId, - source: Option, - topic: String, - data: Vec, -) -> bool { - true -} - -/// Create a detereministic node -async fn setup_node( - ports: (Port, Port), - deterministic_protobuf: &[u8], - boot_nodes: HashMap, -) -> Core { - // Configure the node deterministically so we can connect to it - let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; - - let config = BootstrapConfig::default() - .generate_keypair_from_protobuf("ed25519", &mut protobuf) - .with_tcp(ports.0) - .with_udp(ports.1) - // configure bootnodes, so we can connect to our sister nodes - .with_bootnodes(boot_nodes); - - // Set up network - let mut builder = CoreBuilder::with_config(config); - - // Configure RPC handling - builder = builder.with_rpc(RpcConfig::Default, rpc_incoming_message_handler); - - // Configure gossipsub - // Specify the gossip filter algorithm - let filter_fn = gossipsub_filter_fn; - let builder = builder.with_gossipsub(GossipsubConfig::Default, filter_fn); - - // Finish build - builder.build().await.unwrap() -} - -/// Consume buffer and perform operations on the data retrieved -async fn consume_buffer( - mut node: Core, - name: String, - storage_addr: String, - buffer: Arc>>, -) { - loop { - while let Some(data) = buffer.lock().await.pop_front() - /* just here is the critical section */ - { - // First we save it to local storage - match util::write_to_file(&storage_addr, &data) { - Ok(_) => { - println!("[{name}]>>> Incoming data successfully saved to storage"); - - // Now we gossip data recieved to peers - // Prepare message - let gossip_request = AppData::GossipsubBroadcastMessage { - topic: GOSSIP_NETWORK.to_string(), - message: vec![data.clone()], - }; - - // Send request - if let Ok(_) = node.query_network(gossip_request).await { - println!("[{name}]>>> Data sent to peers: {}", data); - } else { - println!("[{name}]>>> Could not gossip to peers"); - } - }, - Err(e) => println!("[{name}]>>> Error saving data: {e}"), - } - } - - // Sleep a while - sleep_for(4).await; - } -} - -/// Handle incoming gossip and syncronize with the network -async fn handle_incoming_gossip(name: String, mut node: Core, storage_addr: String) { - loop { - // We'll keep polling for gossip events as they come in - while let Some(event) = node.next_event().await { - match event { - // Handle incoming broadcast of random numbers generated by our peers - NetworkEvent::GossipsubIncomingMessageHandled { source: _, data } => { - // Extract data - let incoming_data = data[0].clone(); - - println!( - "[{name}]>>> Incoming data recieved from peer: {}", - incoming_data - ); - - // Insert it to local storage and arrange it properly - if let Ok(lines) = util::read_lines(&storage_addr) { - // First, lets get the number of idiom - let index_string = incoming_data.split('.').collect::>()[0]; - let index = index_string.parse::().unwrap(); - - let (mut before_index, mut after_index): (Vec, Vec) = lines - .filter_map(Result::ok) - .partition(|line| line.contains(&format!("{}", index - 1))); - - // Now join it to the former slice - before_index.push(incoming_data); - - // Concatenate the slices - before_index.append(&mut after_index); - - // save to file - match util::write_lines_sync(&storage_addr, &before_index[..]) { - Ok(_) => println!("[{name}]>>> Data sucessfully sychronized locally"), - Err(_) => println!("[{name}]>>> Local synchronization failed"), - }; - } - }, - _ => {}, - } - } - - // Wait a little for messages to come in - util::sleep_for(WAIT_TIME).await; - } -} - -// #[cfg(feature = "first-node")] -async fn run_node( - name: &str, - ports_1: (Port, Port), - ports_2: (Port, Port), - ports_3: (Port, Port), - peer_ids: (PeerId, PeerId), - keypair: [u8; 68], - http_port: Port, - storage_addr: String, -) { - // Bootnodes - let mut bootnodes = HashMap::new(); - bootnodes.insert( - peer_ids.0.to_base58(), - format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), - ); - - bootnodes.insert( - peer_ids.1.to_base58(), - format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), - ); - - // Setup node 1 and try to connect to node 2 and 3 - let mut node = setup_node(ports_1, &keypair[..], bootnodes).await; - - // Temporaqry data buffer - let buffer: VecDeque = VecDeque::new(); - let temp_buffer = Arc::new(Mutex::new(buffer)); - - // Spawn an HTTP server that writes the incoming data to the internal buffer - let node_name = name.to_owned(); - let buffer = temp_buffer.clone(); - tokio::task::spawn(async move { - util::setup_server(buffer, node_name, http_port).await; - }); - - // Spin up a task to consume buffer as data comes in - let node_name = name.to_owned(); - let node_copy = node.clone(); - let storage_address = storage_addr.clone(); - tokio::task::spawn(consume_buffer( - node_copy, - node_name, - storage_address, - temp_buffer, - )); - - // Wait a little for setup and connections - tokio::time::sleep(Duration::from_secs(WAIT_TIME)).await; - - println!("{:?}", node.next_event().await); - - // Read events generated at setup - while let Some(event) = node.next_event().await { - match event { - NetworkEvent::NewListenAddr { - local_peer_id, - listener_id: _, - address, - } => { - // Announce interfaces we're listening on - println!("Peer id: {}", local_peer_id); - println!("We're listening on {}", address); - }, - NetworkEvent::ConnectionEstablished { - peer_id, - connection_id: _, - endpoint: _, - num_established: _, - established_in: _, - } => { - println!("Connection established with peer: {:?}", peer_id); - }, - _ => {}, - } - } - - // Join a common network (subscribe to a topic) - let join_request = AppData::GossipsubJoinNetwork(GOSSIP_NETWORK.to_string()); - - // Send request to network - let stream_id = node.send_to_network(join_request).await.unwrap(); - if let Ok(_) = node.recv_from_network(stream_id).await { - println!("[{name}]>>> Setup and ready to roll!"); - - // Spin up task to monitor the network events - let node_copy = node.clone(); - let addr = storage_addr.clone(); - let node_name = name.to_owned(); - tokio::task::spawn(handle_incoming_gossip(node_name, node_copy, addr)); - - // Keep alive - loop {} - } else { - panic!("[{name}]>>> Could not join mesh network"); - } -} - -#[tokio::main] -async fn main() { - // Node 1 keypair - let node_1_keypair: [u8; 68] = [ - 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, - 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, - 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, - 148, 159, 36, 170, 109, 178, - ]; - - // Node 2 Keypair - let node_2_keypair: [u8; 68] = [ - 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, - 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, - 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, - 10, 127, 128, 52, 52, 68, 31, - ]; - - // Node 3 Keypair - let node_3_keypair: [u8; 68] = [ - 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, - 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, - 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, - 112, 95, 131, 113, 251, 106, 94, 61, 177, - ]; - - // Get Peer Id's - let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) - .unwrap() - .public() - .to_peer_id(); - - let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) - .unwrap() - .public() - .to_peer_id(); - - let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) - .unwrap() - .public() - .to_peer_id(); - - // Ports - let ports_1: (Port, Port) = (49152, 55003); - let ports_2: (Port, Port) = (49153, 55001); - let ports_3: (Port, Port) = (49154, 55002); - - // HTTP Ports - let http_ports = [4000, 4001, 4002]; - - // Storage addresses - let storage_addrs = ["node-1/file.txt", "node-2/file.txt", "node-3/file.txt"]; - - // Spin up first server - #[cfg(feature = "first-node")] - { - run_node( - "Node 1", - ports_1, - ports_2, - ports_3, - (peer_id_2, peer_id_3), - node_1_keypair, - http_ports[0], - storage_addrs[0].to_string(), - ) - .await; - } - - // Spin up second server - #[cfg(feature = "second-node")] - { - run_node( - "Node 2", - ports_2, - ports_1, - ports_3, - (peer_id_1, peer_id_3), - node_2_keypair, - http_ports[1], - storage_addrs[1].to_string(), - ) - .await; - } - - // Spin up third server - #[cfg(feature = "third-node")] - { - run_node( - "Node 3", - ports_3, - ports_1, - ports_2, - (peer_id_1, peer_id_2), - node_3_keypair, - http_ports[2], - storage_addrs[2].to_string(), - ) - .await; - } - - // Spin up client - #[cfg(feature = "client")] - { - // Initialize client - let file_name = "client/file.txt"; - // Run - util::run_client(file_name, http_ports).await; - } -} - -/// Module containing utility functions -mod util { - use std::{ - collections::VecDeque, - fs::{File, OpenOptions}, - io::{self, BufRead}, - io::{Result as IoResult, Write}, - path::Path, - }; - - use super::*; - use rand::Rng; - use reqwest::Client; - use warp::{ - hyper::{body::Bytes, StatusCode}, - Filter, - }; - - /// Generate random number - pub fn generate_random_number() -> u8 { - let mut rng = rand::thread_rng(); // Initialize the random number generator - rng.gen_range(1..=100) // Generate a number between 1 and 100 inclusive - } - - /// Sleep for a specified duration - pub async fn sleep_for(duration: u64) { - tokio::time::sleep(Duration::from_secs(duration)).await; - } - - /// Setup a HTTP server to recieve data from outside the network - pub async fn setup_server( - temp_buffer: Arc>>, - node_name: String, - http_port: Port, - ) { - // Allow CORS - let cors = warp::cors() - .allow_origin("http://127.0.0.1") - .allow_methods(vec!["GET", "POST", "DELETE"]); - - // Define a POST route that accepts binary data - let upload_route = warp::post() - .and(warp::path("upload")) - .and(warp::body::bytes()) // Accept raw bytes as the body - .and_then(move |bytes: Bytes| { - let node_name = node_name.clone(); - let temp_buffer = temp_buffer.clone(); - - async move { - // Convert bytes to string - let data = String::from_utf8(bytes.to_vec()).unwrap(); - - // Print the received string data - println!("[{}]>>> Received data of length: {}", node_name, data.len()); - - // Save to internal buffer - temp_buffer.lock().await.push_back(data); - - // Return a success response - Ok::<_, warp::Rejection>(warp::reply::with_status( - "String data received", - StatusCode::OK, - )) - } - }) - .with(cors); - - println!("Setting up HTTP server, listening on port {http_port}"); - - // Start the server on specified port - warp::serve(upload_route) - .run(([127, 0, 0, 1], http_port)) - .await; - } - - /// Write text to a file - pub fn write_to_file(filename: &str, text: &str) -> IoResult<()> { - // Open the file in append mode, create if it doesn't exist - let mut file = OpenOptions::new() - .write(true) - .append(true) - .create(true) - .open(filename)?; - - // Write the text into the file - writeln!(file, "{}", text)?; - - Ok(()) - } - - /// Setup a client that would send requests to the network - pub async fn run_client(file_path: &str, ports: [Port; 3]) { - let file_path = file_path.to_owned(); - - // Spin up a task that consistently writes data to the network - let handle = tokio::task::spawn(async move { - // Initialize the reqwest client - let client = Client::new(); - let mut i = 0; - - loop { - if let Ok(lines) = read_lines(&file_path) { - // Read the file line by line - for line in lines { - if let Ok(content) = line { - // Send the line content to the server - match send_to_server(&client, &content, ports[i]).await { - Ok(response) => println!("Server responded: {}", response), - Err(e) => eprintln!("Error sending data: {:?}", e), - } - - // Simulate some delay to avoid overwhelming the server - sleep_for(2).await; - } - - // Modify the counter to balance load on the servers - i = (i + 1) % 3; - } - } - // After finishing the file, start over again - println!("Finished reading the file. Starting over..."); - } - }); - - // Await the task to ensure it runs to completion - if let Err(e) = handle.await { - eprintln!("Task failed with error: {:?}", e); - } - } - - // Send data to the server - async fn send_to_server( - client: &Client, - data: &str, - port: Port, - ) -> Result { - // Format the URL using the provided port - let url = format!("http://127.0.0.1:{}/upload", port); - - // Send the POST request - let res = client - .post(&url) // Use the formatted URL - .body(data.to_string()) - .send() - .await?; - - println!("Response status: {:?}", res.status()); - - // Return the server response as text - Ok(res.text().await?) - } - - // Read lines from a file - pub fn read_lines

(filename: P) -> io::Result>> - where - P: AsRef, - { - let file = File::open(filename)?; - Ok(io::BufReader::new(file).lines()) - } - - /// Synchronous function to write lines to a file, overwriting its content - pub fn write_lines_sync(filename: &str, lines: &[String]) -> IoResult<()> { - // Open the file in write mode, truncating the file to overwrite it - let mut file = OpenOptions::new() - .write(true) - .truncate(true) // Overwrite file contents - .create(true) - .open(filename)?; - - // Write each line to the file - for line in lines { - writeln!(file, "{}", line)?; // Writes the line with a newline - } - - Ok(()) - } -} diff --git a/swarm_nl/Cargo.toml b/swarm_nl/Cargo.toml index 9013e1021..17b5af545 100644 --- a/swarm_nl/Cargo.toml +++ b/swarm_nl/Cargo.toml @@ -13,8 +13,9 @@ libp2p = { version="0.53.2", "features"=["async-std", "macros", "ping", "tokio", libp2p-identity = { version="0.2.8", "features"=["secp256k1", "ecdsa", "rsa", "ed25519"] } futures = "0.3.30" futures-time = "3.0.0" -serde = "1.0.200" +serde = { version = "1.0.2", features = ["derive"] } base58 = "0.2.0" +async-trait = "0.1.83" [dependencies.async-std] version = "1.12.0" @@ -23,11 +24,12 @@ optional = true [dependencies.tokio] version = "1.37.0" optional = true +features = ["full"] [features] tokio-runtime = ["tokio"] async-std-runtime = ["async-std"] -# default = ["async-std-runtime"] +default = ["tokio-runtime"] test-listening-node = [] test-dialing-node = [] diff --git a/swarm_nl/bootstrap_config.ini b/swarm_nl/bootstrap_config.ini index 1501e8149..3e7e55815 100644 --- a/swarm_nl/bootstrap_config.ini +++ b/swarm_nl/bootstrap_config.ini @@ -23,12 +23,3 @@ boot_nodes=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/x.x.x.x/tc [blacklist] ; The list of blacklisted peers we don't want to have anything to do with blacklist=[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt] - -; This section specifies configurations for replication of data -[repl] -; Replica nodes to broadcast replicated data to -; Format: [Replication key:[Node addresses]] -static_replica_nodes=[ - j5*%Tndsd&*fys87zhf9890mnu:[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/x.x.x.x/tcp/1509, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt:/ip4/x.x.x.x/tcp/1509]@ - jgbfd78fys87zhf9890mnu:[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/x.x.x.x/tcp/1509, QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt:/ip4/x.x.x.x/tcp/1509]@ -] \ No newline at end of file diff --git a/swarm_nl/src/core/mod.rs b/swarm_nl/src/core/mod.rs index 8110e1cff..6291a7336 100644 --- a/swarm_nl/src/core/mod.rs +++ b/swarm_nl/src/core/mod.rs @@ -7,12 +7,12 @@ #![doc = include_str!("../../doc/core/ApplicationInteraction.md")] use std::{ + cmp, collections::{vec_deque::IntoIter, BTreeSet, HashMap, HashSet}, fs, net::IpAddr, num::NonZeroU32, path::Path, - rc::Rc, sync::Arc, time::Duration, }; @@ -32,15 +32,17 @@ use libp2p::{ ping::{self, Failure}, request_response::{self, cbor::Behaviour, ProtocolSupport}, swarm::{NetworkBehaviour, SwarmEvent}, - swarm::{NetworkBehaviour, SwarmEvent}, tcp, tls, yamux, Multiaddr, StreamProtocol, Swarm, SwarmBuilder, }; -use prelude::replica_cfg::ReplBufferData; +use replication::{ + ConsistencyModel, ReplBufferData, ReplConfigData, ReplInfo, ReplNetworkConfig, + ReplicaBufferQueue, +}; use self::{ gossipsub_cfg::{Blacklist, GossipsubConfig, GossipsubInfo}, ping_config::*, - replica_cfg::{ReplInfo, ReplNetworkConfig, ReplicaBufferQueue}, + sharding::{ShardingCfg, ShardingInfo}, }; use super::*; @@ -52,8 +54,10 @@ use async_std::sync::Mutex; #[cfg(feature = "tokio-runtime")] use tokio::sync::Mutex; -mod prelude; +pub(crate) mod prelude; pub use prelude::*; +pub mod replication; +pub mod sharding; mod tests; /// The Core Behaviour implemented which highlights the various protocols @@ -128,7 +132,7 @@ pub struct CoreBuilder { /// The size of the stream buffers to use to track application requests to the network layer /// internally. stream_size: usize, - /// The IP address to listen on. + /// The IP address to listen on.f ip_address: IpAddr, /// Connection keep-alive duration while idle. keep_alive_duration: Seconds, @@ -150,8 +154,11 @@ pub struct CoreBuilder { gossipsub::Behaviour, fn(PeerId, MessageId, Option, String, Vec) -> bool, ), - /// The network data for replication accross nodes - replication_cfg: (Rc>, ReplNetworkConfig), + /// The network data for replication operations + replication_cfg: ReplNetworkConfig, + /// The name of the entire shard network. This is important for quick broadcasting of changes + /// in the shard network as a whole. + sharding: ShardingInfo, } impl CoreBuilder { @@ -224,7 +231,15 @@ impl CoreBuilder { identify, request_response: (request_response_behaviour, rpc_handler_fn), gossipsub: (gossipsub_behaviour, gossip_filter_fn), - replication_cfg: (config.repl_cfg(), ReplNetworkConfig::Default), + replication_cfg: ReplNetworkConfig::Default, + // The default peers to be forwarded sharded data must be 25% of the total in a shard + sharding: ShardingInfo { + id: Default::default(), + config: ShardingCfg { + callback: rpc_handler_fn, + }, + state: Default::default(), + }, } } @@ -287,12 +302,22 @@ impl CoreBuilder { } } - /// Configure the 'Replication` protocol for the network. Here we accept the key for the - /// replication network and the handler to be called when the replicated data comes in + /// Configure the `Replication` protocol for the network. pub fn with_replication(mut self, repl_cfg: ReplNetworkConfig) -> Self { - // Save the event handler and map it to the replica group key - self.replication_cfg.1 = repl_cfg; - Self { ..self } + self.replication_cfg = repl_cfg; + CoreBuilder { ..self } + } + + /// Configure the `Sharding` protocol for the network. + pub fn with_sharding(self, network_id: String, callback: fn(RpcData) -> RpcData) -> Self { + CoreBuilder { + sharding: ShardingInfo { + id: network_id, + config: ShardingCfg { callback }, + state: Default::default(), + }, + ..self + } } /// Configure the RPC protocol for the network. @@ -623,9 +648,8 @@ impl CoreBuilder { }; // Set up Replication information - let repl_cfg = self.replication_cfg.0; let repl_info = ReplInfo { - state: Arc::new((*repl_cfg).clone()), + state: Arc::new(Mutex::new(Default::default())), }; // Initials stream id @@ -643,6 +667,7 @@ impl CoreBuilder { rpc_handler_fn: self.request_response.1, gossip_filter_fn: self.gossipsub.1, replication: repl_info, + sharding: self.sharding.clone(), }; // Build the network core @@ -654,30 +679,27 @@ impl CoreBuilder { current_stream_id: Arc::new(Mutex::new(stream_id)), // Initialize an empty event queue event_queue: DataQueue::new(), - replica_buffer: Arc::new(ReplicaBufferQueue::new(self.replication_cfg.1)), + replica_buffer: Arc::new(ReplicaBufferQueue::new(self.replication_cfg.clone())), + network_info, }; - // Ensure there is support for replication - if !network_info.replication.state.is_empty() { - // Spin up task to handle replication to nodes + // Check if sharding is configured + if !self.sharding.id.is_empty() { + // Spin up task to init the network + let mut core = network_core.clone(); #[cfg(feature = "async-std-runtime")] - async_std::task::spawn(Core::init_replication( - network_core.clone(), - network_info.clone(), - )); + async_std::task::spawn( + async move { core.init_sharding(self.sharding.id.clone()).await }, + ); #[cfg(feature = "tokio-runtime")] - tokio::task::spawn(Core::init_replication( - network_core.clone(), - network_info.clone(), - )); + tokio::task::spawn(async move { core.init_sharding(self.sharding.id.clone()).await }); } // Spin up task to handle async operations and data on the network #[cfg(feature = "async-std-runtime")] async_std::task::spawn(Core::handle_async_operations( swarm, - network_info, network_sender, network_receiver, network_core.clone(), @@ -687,7 +709,6 @@ impl CoreBuilder { #[cfg(feature = "tokio-runtime")] tokio::task::spawn(Core::handle_async_operations( swarm, - network_info, network_sender, network_receiver, network_core.clone(), @@ -743,17 +764,55 @@ pub struct Core { event_queue: DataQueue, /// The internal buffer storing incoming replicated content before they are expired or consumed replica_buffer: Arc, + /// Important information about the network + network_info: NetworkInfo, } impl Core { - /// The flag used to initialize the replication protocol. When replica nodes recieve - /// an RPC with the flag as the first parameter, they then join the replication network whose - /// key is specified in the second field - pub const REPL_CFG: &'static str = "REPL_CFG_@@"; + /// The delimeter that separates the messages to gossip. + pub const GOSSIP_MESSAGE_SEPARATOR: &'static str = "~~##~~"; /// The gossip flag to indicate that incoming gossipsub message is actually data sent for - /// replication - pub const REPL_GOSSIP: &'static str = "REPL_GOSSIP_@@"; + /// replication. + pub const REPL_GOSSIP_FLAG: &'static str = "REPL_GOSSIP_FLAG__@@"; + + /// The RPC flag to indicate that incoming message is data that has been forwarded to the node + /// because it is a member of the logical shard to store the data. + pub const RPC_DATA_FORWARDING_FLAG: &'static str = "RPC_DATA_FORWARDING_FLAG__@@"; + + /// The gossip flag to indicate that incoming (or outgoing) gossipsub message is a part of the + /// strong consistency algorithm, intending to increase the confirmation count of a particular + /// data item in the replicas temporary buffer. + pub const STRONG_CONSISTENCY_FLAG: &'static str = "STRONG_CON__@@"; + + /// The gossip flag to indicate that incoming (or outgoing) gossipsub message is a part of the + /// eventual consistency algorithm seeking to synchronize data across nodes. + pub const EVENTUAL_CONSISTENCY_FLAG: &'static str = "EVENTUAL_CON_@@"; + + /// The RPC flag to pull missing data from a replica node for eventual consistency + /// synchronization. + pub const RPC_SYNC_PULL_FLAG: &'static str = "RPC_SYNC_PULL_FLAG__@@"; + + /// The RPC flag to update the shard network state of a joining node. + pub const SHARD_RPC_SYNC_FLAG: &'static str = "SHARD_RPC_SYNC_FLAG__@@"; + + /// The sharding gossip flag to indicate that a node has joined a shard network. + pub const SHARD_GOSSIP_JOIN_FLAG: &'static str = "SHARD_GOSSIP_JOIN_FLAG__@@"; + + /// The sharding gossip flag to indicate that a node has exited a shard network. + pub const SHARD_GOSSIP_EXIT_FLAG: &'static str = "SHARD_GOSSIP_EXIT_FLAG__@@"; + + /// The RPC flag to request a data from a node in a logical shard. + pub const SHARD_RPC_REQUEST_FLAG: &'static str = "SHARD_RPC_REQUEST_FLAG__@@"; + + /// The delimeter between the data fields of an entry in a dataset requested by a replica peer. + pub const FIELD_DELIMITER: &'static str = "_@_"; + + /// The delimeter between the data entries that has been requested by a replica peer. + pub const ENTRY_DELIMITER: &'static str = "@@@"; + + /// The delimeter to separate messages during RPC data marshalling + pub const DATA_DELIMITER: &'static str = "$$"; /// Serialize keypair to protobuf format and write to config file on disk. This could be useful /// for saving a keypair for future use when going offline. @@ -806,6 +865,24 @@ impl Core { self.event_queue.pop().await } + /// Return the number of replica peers in a network, with the node exclusive. + pub async fn replica_peers(&mut self, replica_network: &str) -> Vec { + let mut peers = Vec::new(); + + // Check gossip group + let request = AppData::GossipsubGetInfo; + if let Ok(response) = self.query_network(request).await { + if let AppResponse::GossipsubGetInfo { mesh_peers, .. } = response { + for (peer_id, networks) in mesh_peers { + if networks.contains(&replica_network.to_string()) { + peers.push(peer_id); + } + } + } + } + peers + } + /// Send data to the network layer and recieve a unique `StreamId` to track the request. /// /// If the internal stream buffer is full, `None` will be returned. @@ -848,7 +925,7 @@ impl Core { /// Explicitly retrieve the reponse to a request sent to the network layer. /// This function is decoupled from the [`Core::send_to_network()`] method so as to prevent /// blocking until the response is returned. - pub async fn recv_from_network(&mut self, stream_id: StreamId) -> NetworkResult { + pub async fn recv_from_network(&mut self, stream_id: StreamId) -> NetworkResult { #[cfg(feature = "async-std-runtime")] { let channel = self.clone(); @@ -863,8 +940,8 @@ impl Core { return Ok(result); } - // Timeout after 5 trials - if loop_count < 5 { + // Timeout after 10 trials + if loop_count < 10 { loop_count += 1; } else { return Err(NetworkError::NetworkReadTimeout); @@ -878,7 +955,7 @@ impl Core { // Wait for the spawned task to complete match response_handler.await { Ok(result) => result, - Err(_) => Err(NetworkError::InternalTaskError), + Err(_) => Err(NetworkError::NetworkReadTimeout), } } @@ -896,8 +973,8 @@ impl Core { return Ok(result); } - // Timeout after 5 trials - if loop_count < 5 { + // Timeout after 10 trials + if loop_count < 10 { loop_count += 1; } else { return Err(NetworkError::NetworkReadTimeout); @@ -911,7 +988,7 @@ impl Core { // Wait for the spawned task to complete match response_handler.await { Ok(result) => result?, - Err(_) => Err(NetworkError::InternalTaskError), + Err(_) => Err(NetworkError::NetworkReadTimeout), } } } @@ -923,7 +1000,7 @@ impl Core { /// delay can be condoned. It will still timeout if the delay exceeds the configured period. /// /// If the internal buffer is full, it will return an error. - pub async fn query_network(&mut self, request: AppData) -> NetworkResult { + pub async fn query_network(&mut self, request: AppData) -> NetworkResult { // Send request if let Some(stream_id) = self.send_to_network(request).await { // Wait to recieve response from the network @@ -933,115 +1010,57 @@ impl Core { } } - /// Initiate the replication protocol - async fn init_replication(core: Core, network_info: NetworkInfo) { - // On setup, we added the replica nodes to our list of bootstrap nodes, so we can - // assume they have been dialed and a connection has been established - - // For setting up the replication protocol, we first send an RPC to the replica nodes - // and specify the first element of the vector as "REPL_CFG_@@". We then add the - // network key/name as the second argument. When the replica nodes recieve this, they then - // join the network using the key specified, so replication can take place. - - // Join private replication network - let repl_data = network_info.replication.state.clone(); + /// Setup the necessary preliminaries for the sharding protocol + async fn init_sharding(&mut self, network_id: String) { + // We will setup the indelying gossip group that all nodes in the nwtwork must be a part of, + // to keep the state of the network consistent + let gossip_request = AppData::GossipsubJoinNetwork(network_id.clone()); + let _ = self.query_network(gossip_request).await; + } - for replication_data in repl_data.iter() { - // Clone variables before moving them into a task - let repl_data = replication_data.clone(); - let mut core = core.clone(); + /// Update the state of the shard network. This is relevant when nodes join and leave the shard + /// network. + async fn update_shard_state(&mut self, peer: PeerId, shard_id: ShardId, join: bool) { + // Update state + let mut shard_state = self.network_info.sharding.state.lock().await; + let shard_entry = shard_state + .entry(shard_id.clone()) + .or_insert(Default::default()); + + // If the node is joining + if join { + shard_entry.push(peer); + } else { + // Update shard state to reflect exit + shard_entry.retain(|entry| entry != &peer); + } + } - // Spawn an async-std task to join replica network and propel others to join aswell - #[cfg(feature = "tokio-runtime")] - tokio::task::spawn(async move { - // Join private replication network - let gossip_request = AppData::GossipsubJoinNetwork(repl_data.network_key.clone()); - if let Ok(_) = core.query_network(gossip_request).await { - for (peer_id_str, _) in repl_data.nodes.iter() { - // Parse the peer ID - match peer_id_str.parse::() { - Ok(peer_id) => { - // Prepare fetch request - let fetch_request = AppData::FetchData { - keys: vec![ - Core::REPL_CFG.to_owned().into(), - repl_data.network_key.clone().into(), - ], - peer: peer_id, - }; - - // Send the fetch request - if let Err(e) = core.query_network(fetch_request).await { - eprintln!( - "Failed to query network for peer {}: {:?}", - peer_id_str, e - ); - } - }, - Err(_) => { - eprintln!( - "Failed to parse peer ID specified as replica node: {}", - peer_id_str - ); - }, - } - } - } else { - eprintln!( - "Failed to send gossip request for key: {}", - repl_data.network_key - ); - } - }); + /// Publish the current shard state of the network to the new peer just joining. This will + /// enable the node to have a current view of the network. + async fn publish_shard_state(&mut self, peer: PeerId) { + // Marshall the local state into a byte vector + let shard_state = self.network_info.sharding.state.lock().await.clone(); + let bytes = shard_image_to_bytes(shard_state); + + let message = vec![ + Core::SHARD_RPC_SYNC_FLAG.as_bytes().to_vec(), // Flag to indicate a sync request + bytes, // Network state + ]; + + // Send the RPC request. + let rpc_request = AppData::SendRpc { + keys: message, + peer, + }; - #[cfg(feature = "async-std-runtime")] - async_std::task::spawn(async move { - // Join private replication network - let gossip_request = AppData::GossipsubJoinNetwork(repl_data.network_key.clone()); - if let Ok(_) = core.query_network(gossip_request).await { - for (peer_id_str, _) in repl_data.nodes.iter() { - // Parse the peer ID - match peer_id_str.parse::() { - Ok(peer_id) => { - // Prepare fetch request - let fetch_request = AppData::FetchData { - keys: vec![ - Core::REPL_CFG.to_owned().into(), - repl_data.network_key.clone().into(), - ], - peer: peer_id, - }; - - // Send the fetch request - if let Err(e) = core.query_network(fetch_request).await { - eprintln!( - "Failed to query network for peer {}: {:?}", - peer_id_str, e - ); - } - }, - Err(_) => { - eprintln!( - "Failed to parse peer ID specified as replica node: {}", - peer_id_str - ); - }, - } - } - } else { - eprintln!( - "Failed to send gossip request for key: {}", - repl_data.network_key - ); - } - }); - } + let _ = self.query_network(rpc_request).await; } /// Handle incoming replicated data. - /// The first element of the incoming data vector contains the replica group key - async fn handle_incoming_repl_data(&mut self, repl_data: ReplBufferData) { - // First we generate an event announcing the arrival of a replica. + /// The first element of the incoming data vector contains the name of the replica network. + async fn handle_incoming_repl_data(&mut self, repl_network: String, repl_data: ReplBufferData) { + // First, we generate an event announcing the arrival of some replicated data. // Application developers can listen for this let replica_data = repl_data.clone(); self.event_queue @@ -1050,68 +1069,174 @@ impl Core { outgoing_timestamp: replica_data.outgoing_timestamp, incoming_timestamp: replica_data.incoming_timestamp, message_id: replica_data.message_id, - sender: replica_data.sender, + source: replica_data.sender, }) - .await; - - // Then push into buffer queue - self.replica_buffer.push(repl_data).await; + .await; + + // Compare and increment the lamport's clock for the replica node + if let Some(repl_network_data) = self + .network_info + .replication + .state + .lock() + .await + .get_mut(&repl_network) + { + // Update clock + (*repl_network_data).lamport_clock = + cmp::max(repl_network_data.lamport_clock, repl_data.lamport_clock) + .saturating_add(1); + + // Then push into buffer queue + self.replica_buffer + .push(self.clone(), repl_network, repl_data) + .await; + } } - /// Consume data in replication buffer - pub async fn consume_repl_data(&mut self) -> Option { - self.replica_buffer.pop_front().await + /// Handle incoming shard data. We will not be doing any internal buffering as the data would be + /// exposed as an event. + async fn handle_incoming_shard_data( + &mut self, + shard_id: String, + source: PeerId, + incoming_data: ByteVector, + ) { + // Push into event queue + self.event_queue + .push(NetworkEvent::IncomingForwardedData { + data: byte_vec_to_string_vec(incoming_data.clone()), + source, + }) + .await; + + // Notify other nodes in the shard + let _ = self.replicate(incoming_data, &shard_id).await; } - /// Send data to replica nodes - pub async fn replicate(&mut self, mut replica_data: ByteVector, repl_key: &str) { - // So that we do not block the foreground operations, we will spawn a task to handle it and - // then return immediately + /// Consume data in replication buffer. + pub async fn consume_repl_data(&mut self, replica_network: &str) -> Option { + self.replica_buffer + .pop_front(self.clone(), replica_network) + .await + } - // Prepare the replication message - let mut node = self.clone(); - let replication_key = repl_key.to_owned(); + /// Join a replica network and get up to speed with the current network data state. + /// + /// If the consistency model is eventual, the node's buffer will almost immediately be up to + /// date. But if the consistency model is strong, [`Core::replicate_buffer`] must be called to + /// update the buffer. + pub async fn join_repl_network(&mut self, repl_network: String) -> NetworkResult<()> { + // Set up replica network config + let mut cfg = self.network_info.replication.state.lock().await; + cfg.entry(repl_network.clone()).or_insert(ReplConfigData { + lamport_clock: 0, + last_clock: 0, + nodes: Default::default(), + }); - #[cfg(feature = "tokio-runtime")] - tokio::task::spawn(async move { - // Add the replication flag as the first element of the message and the replication key - // as the second one - let mut message = vec![ - Core::REPL_GOSSIP.as_bytes().to_vec(), - replication_key.clone().into(), - ]; - message.append(&mut replica_data); - - // Prepare a gossip request - let gossip_request = AppData::GossipsubBroadcastMessage { - topic: replication_key.to_owned(), - message, - }; + // Initialize replica buffers + self.replica_buffer.init(repl_network.clone()).await; - // Gossip data to replica nodes - let _ = node.query_network(gossip_request).await; - }); + // Free `Core` + drop(cfg); - #[cfg(feature = "async-std-runtime")] - async_std::task::spawn(async move { - // Construct replica message - let mut message = vec![ - Core::REPL_GOSSIP.as_bytes().to_vec(), // Replica Gossip Flag - get_unix_timestamp().to_string().into(), // Timestamp - replication_key.clone().into(), - ]; - // Then append data - message.append(&mut replica_data); - - // Prepare a gossip request - let gossip_request = AppData::GossipsubBroadcastMessage { - topic: replication_key.to_owned(), - message, - }; + // Join the replication (gossip) network + let gossip_request = AppData::GossipsubJoinNetwork(repl_network.clone()); + let _ = self.query_network(gossip_request).await?; - // Gossip data to replica nodes - let _ = node.query_network(gossip_request).await; - }); + // Check if the consistency model is eventual + if let ConsistencyModel::Eventual = self.replica_buffer.consistency_model() { + // Spin up task to ensure data consistency across the network + let core = self.clone(); + let network = repl_network.clone(); + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + let buffer = core.replica_buffer.clone(); + buffer.sync_with_eventual_consistency(core, network).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + let buffer = core.replica_buffer.clone(); + buffer.sync_with_eventual_consistency(core, network).await; + }); + } + + Ok(()) + } + + /// Leave a replica network. The messages on the internal replica queue are not discarded so as + /// to aid speedy recorvery in case of reconnection. + pub async fn leave_repl_network(&mut self, repl_network: String) -> NetworkResult { + // Leave the replication (gossip) network + let gossip_request = AppData::GossipsubExitNetwork(repl_network.clone()); + self.query_network(gossip_request).await + } + + /// Clone a replica node's current buffer image. This is necessary in case of + /// joining a replica network with a strong consistency model. + pub async fn replicate_buffer( + &self, + repl_network: String, + replica_node: PeerId, + ) -> Result<(), NetworkError> { + // First make sure i'm a part of the replica network + if self + .network_info + .replication + .state + .lock() + .await + .contains_key(&repl_network) + { + // Populate buffer + self.replica_buffer + .replicate_buffer(self.clone(), repl_network, replica_node) + .await + } else { + Err(NetworkError::MissingReplNetwork) + } + } + + /// Send data to replica nodes. Function returns false if node is not a member of the replica + /// network specified, meaning the replication network has not been configured or joined. + pub async fn replicate( + &mut self, + mut replica_data: ByteVector, + replica_network: &str, + ) -> NetworkResult<()> { + // Extract the replica network data with minimal lock time + let replica_network_data = { + let mut state = self.network_info.replication.state.lock().await; + if let Some(data) = state.get_mut(replica_network) { + // Increase the clock atomically before releasing the lock + data.lamport_clock = data.lamport_clock.saturating_add(1); + data.clone() + } else { + return Err(NetworkError::MissingReplNetwork); + } + }; + + // Prepare the replication message + let mut message = vec![ + Core::REPL_GOSSIP_FLAG.as_bytes().to_vec(), // Replica Gossip Flag + get_unix_timestamp().to_string().into(), // Timestamp + replica_network_data.lamport_clock.to_string().into(), // Clock + replica_network.to_owned().into(), // Replica network + ]; + message.append(&mut replica_data); + + // Prepare a gossip request + let gossip_request = AppData::GossipsubBroadcastMessage { + topic: replica_network.to_owned(), + message, + }; + + // Gossip data to replica nodes + self.query_network(gossip_request).await?; + + Ok(()) } /// Handle the responses coming from the network layer. This is usually as a result of a request @@ -1135,7 +1260,7 @@ impl Core { res @ AppResponse::KademliaGetProviders{..} => buffer_guard.insert(stream_id, Ok(res)), res @ AppResponse::KademliaNoProvidersFound => buffer_guard.insert(stream_id, Ok(res)), res @ AppResponse::KademliaGetRoutingTableInfo { .. } => buffer_guard.insert(stream_id, Ok(res)), - res @ AppResponse::FetchData(..) => buffer_guard.insert(stream_id, Ok(res)), + res @ AppResponse::SendRpc(..) => buffer_guard.insert(stream_id, Ok(res)), res @ AppResponse::GetNetworkInfo{..} => buffer_guard.insert(stream_id, Ok(res)), res @ AppResponse::GossipsubBroadcastSuccess => buffer_guard.insert(stream_id, Ok(res)), res @ AppResponse::GossipsubJoinSuccess => buffer_guard.insert(stream_id, Ok(res)), @@ -1158,7 +1283,6 @@ impl Core { /// Important information are sent to the application layer over a (mpsc) stream. async fn handle_async_operations( mut swarm: Swarm, - mut network_info: NetworkInfo, mut network_sender: Sender, mut receiver: Receiver, mut network_core: Core, @@ -1169,217 +1293,228 @@ impl Core { let data_queue_3 = DataQueue::new(); let data_queue_4 = DataQueue::new(); + // Network information + let mut network_info = network_core.network_info.clone(); + // Loop to handle incoming application streams indefinitely loop { select! { - // Handle incoming stream data - stream_data = receiver.next() => { - match stream_data { - Some(incoming_data) => { - match incoming_data { - StreamData::FromApplication(stream_id, app_data) => { - // Trackable stream id - let stream_id = stream_id; - match app_data { - // Put back into the stream what we read from it - AppData::Echo(message) => { - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Echo(message))).await; - }, - AppData::DailPeer(peer_id, multiaddr) => { - if let Ok(multiaddr) = multiaddr.parse::() { - // Add to routing table - swarm.behaviour_mut().kademlia.add_address(&peer_id, multiaddr.clone()); - if let Ok(_) = swarm.dial(multiaddr.clone().with(Protocol::P2p(peer_id))) { - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::DailPeerSuccess(multiaddr.to_string()))).await; + // Handle incoming stream data + stream_data = receiver.next() => { + match stream_data { + Some(incoming_data) => { + match incoming_data { + StreamData::FromApplication(stream_id, app_data) => { + // Trackable stream id + let stream_id = stream_id; + match app_data { + // Put back into the stream what we read from it + AppData::Echo(message) => { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Echo(message))).await; + }, + AppData::DailPeer(peer_id, multiaddr) => { + if let Ok(multiaddr) = multiaddr.parse::() { + // Add to routing table + swarm.behaviour_mut().kademlia.add_address(&peer_id, multiaddr.clone()); + if let Ok(_) = swarm.dial(multiaddr.clone().with(Protocol::P2p(peer_id))) { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::DailPeerSuccess(multiaddr.to_string()))).await; + } else { + // Return error + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::DailPeerError))).await; + } + } + }, + // Store a value in the DHT and (optionally) on explicit specific peers + AppData::KademliaStoreRecord { key, value, expiration_time, explicit_peers } => { + // Create a kad record + let mut record = Record::new(key.clone(), value); + + + // Set (optional) expiration time + record.expires = expiration_time; + + + // Insert into DHT + if let Ok(_) = swarm.behaviour_mut().kademlia.put_record(record.clone(), kad::Quorum::One) { + // The node automatically becomes a provider in the network + let _ = swarm.behaviour_mut().kademlia.start_providing(RecordKey::new(&key)); + + + // Send streamId to libp2p events, to track response + data_queue_1.push(stream_id).await; + + + // Cache record on peers explicitly (if specified) + if let Some(explicit_peers) = explicit_peers { + // Extract PeerIds + let peers = explicit_peers.iter().map(|peer_id_string| { + PeerId::from_bytes(&peer_id_string.from_base58().unwrap_or_default()) + }).filter_map(Result::ok).collect::>(); + swarm.behaviour_mut().kademlia.put_record_to(record, peers.into_iter(), kad::Quorum::One); + } } else { // Return error - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::DailPeerError))).await; + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::KadStoreRecordError(key)))).await; } - } - }, - // Store a value in the DHT and (optionally) on explicit specific peers - AppData::KademliaStoreRecord { key, value, expiration_time, explicit_peers } => { - // Create a kad record - let mut record = Record::new(key.clone(), value); - - // Set (optional) expiration time - record.expires = expiration_time; + }, + // Perform a lookup in the DHT + AppData::KademliaLookupRecord { key } => { + let _ = swarm.behaviour_mut().kademlia.get_record(key.clone().into()); - // Insert into DHT - if let Ok(_) = swarm.behaviour_mut().kademlia.put_record(record.clone(), kad::Quorum::One) { - // The node automatically becomes a provider in the network - let _ = swarm.behaviour_mut().kademlia.start_providing(RecordKey::new(&key)); // Send streamId to libp2p events, to track response - data_queue_1.push(stream_id).await; - - // Cache record on peers explicitly (if specified) - if let Some(explicit_peers) = explicit_peers { - // Extract PeerIds - let peers = explicit_peers.iter().map(|peer_id_string| { - PeerId::from_bytes(&peer_id_string.from_base58().unwrap_or_default()) - }).filter_map(Result::ok).collect::>(); - swarm.behaviour_mut().kademlia.put_record_to(record, peers.into_iter(), kad::Quorum::One); - } - } else { - // Return error - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::KadStoreRecordError(key)))).await; - } - }, - // Perform a lookup in the DHT - AppData::KademliaLookupRecord { key } => { - let _ = swarm.behaviour_mut().kademlia.get_record(key.clone().into()); + data_queue_2.push(stream_id).await; + }, + // Perform a lookup of peers that store a record + AppData::KademliaGetProviders { key } => { + swarm.behaviour_mut().kademlia.get_providers(key.clone().into()); - // Send streamId to libp2p events, to track response - data_queue_2.push(stream_id).await; - }, - // Perform a lookup of peers that store a record - AppData::KademliaGetProviders { key } => { - swarm.behaviour_mut().kademlia.get_providers(key.clone().into()); - // Send streamId to libp2p events, to track response - data_queue_3.push(stream_id).await; - } - // Stop providing a record on the network - AppData::KademliaStopProviding { key } => { - swarm.behaviour_mut().kademlia.stop_providing(&key.into()); - } - // Remove record from local store - AppData::KademliaDeleteRecord { key } => { - swarm.behaviour_mut().kademlia.remove_record(&key.into()); - } - // Return important routing table info. We could return kbuckets depending on needs, for now it's just the network ID. - AppData::KademliaGetRoutingTableInfo => { - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::KademliaGetRoutingTableInfo{protocol_id: network_info.id.to_string()})).await; - }, - // Fetch data quickly from a peer over the network - AppData::FetchData { keys, peer } => { - // Construct the RPC object - let rpc = Rpc::ReqResponse { data: keys.clone() }; - - // Inform the swarm to make the request - let _ = swarm - .behaviour_mut() - .request_response - .send_request(&peer, rpc); - - // Send streamId to libp2p events, to track response - data_queue_4.push(stream_id).await; - }, - // Return important information about the node - AppData::GetNetworkInfo => { - // Connected peers - let connected_peers = swarm.connected_peers().map(|peer| peer.to_owned()).collect::>(); + // Send streamId to libp2p events, to track response + data_queue_3.push(stream_id).await; + } + // Stop providing a record on the network + AppData::KademliaStopProviding { key } => { + swarm.behaviour_mut().kademlia.stop_providing(&key.into()); + } + // Remove record from local store + AppData::KademliaDeleteRecord { key } => { + swarm.behaviour_mut().kademlia.remove_record(&key.into()); + } + // Return important routing table info. We could return kbuckets depending on needs, for now it's just the network ID. + AppData::KademliaGetRoutingTableInfo => { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::KademliaGetRoutingTableInfo{protocol_id: network_info.id.to_string()})).await; + }, + // Fetch data quickly from a peer over the network + AppData::SendRpc { keys, peer } => { + // Construct the RPC object + let rpc = Rpc::ReqResponse { data: keys.clone() }; - // External Addresses - let external_addresses = swarm.listeners().map(|multiaddr| multiaddr.to_string()).collect::>(); - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GetNetworkInfo { peer_id: swarm.local_peer_id().clone(), connected_peers, external_addresses })).await; - }, - // Send gossip message to peers - AppData::GossipsubBroadcastMessage { message, topic } => { - // Get the topic hash - let topic_hash = TopicHash::from_raw(topic); + // Inform the swarm to make the request + let _ = swarm + .behaviour_mut() + .request_response + .send_request(&peer, rpc); - // Marshall message into a single string - let message = message.join(GOSSIP_MESSAGE_SEPARATOR.as_bytes()); - // Check if we're already subscribed to the topic - let is_subscribed = swarm.behaviour().gossipsub.mesh_peers(&topic_hash).any(|peer| peer == swarm.local_peer_id()); + // Send streamId to libp2p events, to track response + data_queue_4.push(stream_id).await; + }, + // Return important information about the node + AppData::GetNetworkInfo => { + // Connected peers + let connected_peers = swarm.connected_peers().map(|peer| peer.to_owned()).collect::>(); - // Gossip - if swarm - .behaviour_mut().gossipsub - .publish(topic_hash, message).is_ok() && !is_subscribed { - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBroadcastSuccess)).await; - } else { - // Return error - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubBroadcastMessageError))).await; - } - }, - // Join a mesh network - AppData::GossipsubJoinNetwork(topic) => { - // Create a new topic - let topic = IdentTopic::new(topic); + // External Addresses + let external_addresses = swarm.listeners().map(|multiaddr| multiaddr.to_string()).collect::>(); - // Subscribe - if swarm.behaviour_mut().gossipsub.subscribe(&topic).is_ok() { // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubJoinSuccess)).await; - } else { - // Return error - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubJoinNetworkError))).await; - } - }, - // Get information concerning our gossiping - AppData::GossipsubGetInfo => { - // Topics we're subscribed to - let subscribed_topics = swarm.behaviour().gossipsub.topics().map(|topic| topic.clone().into_string()).collect::>(); + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GetNetworkInfo { peer_id: swarm.local_peer_id().clone(), connected_peers, external_addresses })).await; + }, + // Send gossip message to peers + AppData::GossipsubBroadcastMessage { message, topic } => { + // Get the topic hash + let topic_hash = TopicHash::from_raw(topic); + + // Marshall message into a single string + let message = message.join(Core::GOSSIP_MESSAGE_SEPARATOR.as_bytes()); + + // Check if we're already subscribed to the topic + let is_subscribed = swarm.behaviour().gossipsub.mesh_peers(&topic_hash).any(|peer| peer == swarm.local_peer_id()); + + // Gossip + if swarm + .behaviour_mut().gossipsub + .publish(topic_hash, message).is_ok() && !is_subscribed { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBroadcastSuccess)).await; + } else { + // Return error + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubBroadcastMessageError))).await; + } + }, + // Join a mesh network + AppData::GossipsubJoinNetwork(topic) => { + // Create a new topic + let topic = IdentTopic::new(topic); - // Peers we know and the topics they are subscribed too - let mesh_peers = swarm.behaviour().gossipsub.all_peers().map(|(peer, topics)| { - (peer.to_owned(), topics.iter().map(|&t| t.clone().as_str().to_owned()).collect::>()) - }).collect::>(); + // Subscribe + if swarm.behaviour_mut().gossipsub.subscribe(&topic).is_ok() { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubJoinSuccess)).await; + } else { + // Return error + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubJoinNetworkError))).await; + } + }, + // Get information concerning our gossiping + AppData::GossipsubGetInfo => { + // Topics we're subscribed to + let subscribed_topics = swarm.behaviour().gossipsub.topics().map(|topic| topic.clone().into_string()).collect::>(); - // Retrieve blacklist - let blacklist = network_info.gossipsub.blacklist.into_inner(); + // Peers we know and the topics they are subscribed too + let mesh_peers = swarm.behaviour().gossipsub.all_peers().map(|(peer, topics)| { + (peer.to_owned(), topics.iter().map(|&t| t.clone().as_str().to_owned()).collect::>()) + }).collect::>(); - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubGetInfo { topics: subscribed_topics, mesh_peers, blacklist })).await; - }, - // Exit a network we're a part of - AppData::GossipsubExitNetwork(topic) => { - // Create a new topic - let topic = IdentTopic::new(topic); + // Retrieve blacklist + let blacklist = network_info.gossipsub.blacklist.into_inner(); - // Subscribe - if swarm.behaviour_mut().gossipsub.unsubscribe(&topic).is_ok() { // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubExitSuccess)).await; - } else { - // Return error - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubJoinNetworkError))).await; + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubGetInfo { topics: subscribed_topics, mesh_peers, blacklist })).await; + }, + // Exit a network we're a part of + AppData::GossipsubExitNetwork(topic) => { + // Create a new topic + let topic = IdentTopic::new(topic); + + // Subscribe + if swarm.behaviour_mut().gossipsub.unsubscribe(&topic).is_ok() { + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubExitSuccess)).await; + } else { + // Return error + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::Error(NetworkError::GossipsubJoinNetworkError))).await; + } } - } - // Blacklist a peer explicitly - AppData::GossipsubBlacklistPeer(peer) => { - // Add to list - swarm.behaviour_mut().gossipsub.blacklist_peer(&peer); + // Blacklist a peer explicitly + AppData::GossipsubBlacklistPeer(peer) => { + // Add to list + swarm.behaviour_mut().gossipsub.blacklist_peer(&peer); - // Add peer to blacklist - network_info.gossipsub.blacklist.list.insert(peer); + // Add peer to blacklist + network_info.gossipsub.blacklist.list.insert(peer); - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBlacklistSuccess)).await; - }, - // Remove a peer from the blacklist - AppData::GossipsubFilterBlacklist(peer) => { - // Add to list - swarm.behaviour_mut().gossipsub.remove_blacklisted_peer(&peer); + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBlacklistSuccess)).await; + }, + // Remove a peer from the blacklist + AppData::GossipsubFilterBlacklist(peer) => { + // Add to list + swarm.behaviour_mut().gossipsub.remove_blacklisted_peer(&peer); - // Add peer to blacklist - network_info.gossipsub.blacklist.list.remove(&peer); + // Add peer to blacklist + network_info.gossipsub.blacklist.list.remove(&peer); - // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBlacklistSuccess)).await; - }, + // Send the response back to the application layer + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::GossipsubBlacklistSuccess)).await; + }, + } } + _ => {} } - _ => {} - } - }, - _ => {} - } - }, - swarm_event = swarm.next() => { - match swarm_event { - Some(event) => { - match event { + }, + _ => {} + } + }, + swarm_event = swarm.next() => { + match swarm_event { + Some(event) => { + match event { SwarmEvent::NewListenAddr { listener_id, address, @@ -1648,29 +1783,72 @@ impl Core { }, // Request-response CoreEvent::RequestResponse(event) => match event { - request_response::Event::Message { peer: _, message } => match message { + request_response::Event::Message { peer, message } => match message { // A request just came in request_response::Message::Request { request_id: _, request, channel } => { // Parse request match request { Rpc::ReqResponse { data } => { - // Check if it a request to join a replication network - if &data[0][..] == Core::REPL_CFG.as_bytes() { - // Send the response, so as to return the RPC immediately - let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: data.clone() }); - - // Attempt to decode the key - if let Ok(key_string) = String::from_utf8(data[1].clone()) { - // Join the replication (gossip) network - let gossip_request = AppData::GossipsubJoinNetwork(key_string.clone()); - let _ = network_core.clone().query_network(gossip_request).await; + let byte_str = String::from_utf8_lossy(&data[0]); + match byte_str.as_ref() { + // It is a request to retrieve missing data the RPC sender node lacks + Core::RPC_SYNC_PULL_FLAG => { + // Get replica network that the requested data belong to + let repl_network = String::from_utf8(data[1].clone()).unwrap_or_default(); + + // Retrieve missing data from local data buffer + let requested_msgs = network_core.replica_buffer.pull_missing_data(repl_network, &data[2..]).await; + + // Send the response + let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: requested_msgs }); } - } else { - // Pass request data to configured request handler - let response_data = (network_info.rpc_handler_fn)(data); - // Send the response - let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: response_data }); - }; + // It is a request to join a shard network + Core::SHARD_RPC_SYNC_FLAG => { + // Parse the incoming shard state + let incoming_state = bytes_to_shard_image(data[1].clone()); + + // Merge the incoming state with local + let mut current_shard_state = network_core.network_info.sharding.state.lock().await; + merge_shard_states(&mut current_shard_state, incoming_state); + + // Send the response + let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: Default::default() }); + } + // It is an incoming shard message forwarded from peer not permitted to store the data + Core::RPC_DATA_FORWARDING_FLAG => { + // Send the response, so as to return the RPC immediately + let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: Default::default() }); + + // Handle incoming shard data + let shard_id = String::from_utf8_lossy(&data[1]).to_string(); + let mut core = network_core.clone(); + let incoming_data: ByteVector = data[2..].into(); + + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + let _ = core.handle_incoming_shard_data(shard_id, peer, incoming_data).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + let _ = core.handle_incoming_shard_data(shard_id, peer, incoming_data).await; + }); + } + // It is an incmoing request to ask for data on this node because it is a member of a logical shard + Core::SHARD_RPC_REQUEST_FLAG => { + // Pass request data to configured shard request handler + let response_data = (network_info.sharding.config.callback)(data[1..].into()); + // Send the response + let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: response_data }); + } + // Normal RPC + _ => { + // Pass request data to configured request handler + let response_data = (network_info.rpc_handler_fn)(data); + // Send the response + let _ = swarm.behaviour_mut().request_response.send_response(channel, Rpc::ReqResponse { data: response_data }); + } + } } } }, @@ -1681,7 +1859,7 @@ impl Core { match response { Rpc::ReqResponse { data } => { // Send the response back to the application layer - let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::FetchData(data))).await; + let _ = network_sender.send(StreamData::ToApplication(stream_id, AppResponse::SendRpc(data))).await; }, } } @@ -1702,29 +1880,120 @@ impl Core { gossipsub::Event::Message { propagation_source, message_id, message } => { // Break data into its constituents. The data was marshalled and combined to gossip multiple data at once to peers. // Now we will break them up and pass for handling - let data_string = String::from_utf8(message.data).unwrap_or_default(); - let gossip_data = data_string.split(GOSSIP_MESSAGE_SEPARATOR).map(|msg| msg.to_string()).collect::>(); - - // Check whether it is a replication message - if gossip_data[0] == Core::REPL_GOSSIP { - // Construct buffer data - let queue_data = ReplBufferData { - data: gossip_data[2..].to_owned(), - outgoing_timestamp: get_unix_timestamp(), - incoming_timestamp: gossip_data[1].parse::().unwrap_or(0), - message_id: message_id.to_string(), - sender: propagation_source.clone(), - }; - - // Handle incoming replicated data - network_core.handle_incoming_repl_data(queue_data).await; - } else { - // First trigger the configured application filter event - if (network_info.gossip_filter_fn)(propagation_source.clone(), message_id, message.source, message.topic.to_string(), gossip_data.clone()) { - // Append to network event queue - network_core.event_queue.push(NetworkEvent::GossipsubIncomingMessageHandled { source: propagation_source, data: gossip_data }).await; + let data_string = String::from_utf8_lossy(&message.data).to_string(); + let gossip_data = data_string.split(Core::GOSSIP_MESSAGE_SEPARATOR).map(|msg| msg.to_string()).collect::>(); + match gossip_data[0].as_str() { + // It is an incoming replication message + Core::REPL_GOSSIP_FLAG => { + // Construct buffer data + let queue_data = ReplBufferData { + data: gossip_data[4..].to_owned(), + lamport_clock: gossip_data[2].parse::().unwrap_or(0), // It can never panic + outgoing_timestamp: gossip_data[1].parse::().unwrap_or(0), + incoming_timestamp: get_unix_timestamp(), + message_id: message_id.to_string(), + sender: if let Some(peer) = message.source { peer.clone() } else { propagation_source.clone() }, + confirmations: if network_core.replica_buffer.consistency_model() == ConsistencyModel::Eventual { + // No confirmations needed for eventual consistency + None + } else { + // Set count to 1 + Some(1) + } + }; + + // Handle incoming replicated data + let mut core = network_core.clone(); + let queue_data = queue_data.clone(); + let data = gossip_data[3].clone().into(); + + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + let _ = core.handle_incoming_repl_data(data, queue_data).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + let _ = core.handle_incoming_repl_data(data, queue_data).await; + }); + }, + // It is a broadcast from replica nodes to ensure strong consistency + Core::STRONG_CONSISTENCY_FLAG => { + // Handle incoming replicated data + let core = network_core.clone(); + let data = gossip_data[2].clone().into(); + let network = gossip_data[1].to_owned(); + + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + let _ = core.replica_buffer.handle_data_confirmation(core.clone(), network, data).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + let _ = core.replica_buffer.handle_data_confirmation(core.clone(), network, data).await; + }); + }, + // It is a broadcast from replica nodes to ensure eventual consistency + Core::EVENTUAL_CONSISTENCY_FLAG => { + // Lower bound of Lamport's Clock + let min_clock = gossip_data[3].parse::().unwrap_or_default(); + // Higher bound of Lamport's Clock + let max_clock = gossip_data[4].parse::().unwrap_or_default(); + + // Synchronize the incoming replica node's buffer image with the local buffer image + let core = network_core.clone(); + let repl_peer_id = gossip_data[1].clone(); + let repl_network = gossip_data[2].clone(); + let replica_data_state = gossip_data[5..].to_owned(); + + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + core.replica_buffer.sync_buffer_image(core.clone(), repl_peer_id, repl_network, (min_clock, max_clock), replica_data_state).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + core.replica_buffer.sync_buffer_image(core.clone(), repl_peer_id, repl_network, (min_clock, max_clock), replica_data_state).await; + }); + } + // It is a broadcast to inform us about the addition of a new node to a shard network + Core::SHARD_GOSSIP_JOIN_FLAG => { + // Update sharding network state of remote node + if let Ok(peer_id) = gossip_data[1].parse::() { + // Send an RPC to the joining node to update its sharding state of the network + let mut core = network_core.clone(); + + #[cfg(feature = "tokio-runtime")] + tokio::task::spawn(async move { + core.publish_shard_state(peer_id).await; + }); + + #[cfg(feature = "async-std-runtime")] + async_std::task::spawn(async move { + core.publish_shard_state(peer_id).await; + }); + + // Update local state + let _ = network_core.update_shard_state(peer_id, gossip_data[2].clone(), true /* join */).await; + } + } + // It is a broadcast to inform us about the exit of a node from a shard network + Core::SHARD_GOSSIP_EXIT_FLAG => { + // Upload sharding network state + if let Ok(peer_id) = gossip_data[1].parse::() { + let _ = network_core.update_shard_state(peer_id, gossip_data[2].clone(), false /* exit */).await; + } + } + // Normal gossip + _ => { + // First trigger the configured application filter event + if (network_info.gossip_filter_fn)(propagation_source.clone(), message_id, message.source, message.topic.to_string(), gossip_data.clone()) { + // Append to network event queue + network_core.event_queue.push(NetworkEvent::GossipsubIncomingMessageHandled { source: propagation_source, data: gossip_data }).await; + } + // else { // drop message } } - // else { // drop message } } }, // A peer just subscribed diff --git a/swarm_nl/src/core/prelude.rs b/swarm_nl/src/core/prelude.rs index 6918f28d2..01b5bf8f6 100644 --- a/swarm_nl/src/core/prelude.rs +++ b/swarm_nl/src/core/prelude.rs @@ -1,6 +1,8 @@ // Copyright 2024 Algorealm, Inc. // Apache 2.0 License +//! The module that contains important data structures and logic for the functioning of swarmNl. + use self::ping_config::PingInfo; use libp2p::gossipsub::MessageId; use libp2p_identity::PublicKey; @@ -30,25 +32,28 @@ pub type AppResponseResult = Result; /// Type that represents the data exchanged during RPC operations. pub type RpcData = ByteVector; -/// Type that represents a vector of vector of bytes +/// Type that represents a vector of vector of bytes. pub type ByteVector = Vec>; +/// Type that represents the id of a shard +pub type ShardId = String; + +/// Type that represents the result for network operations +pub type NetworkResult = Result; + /// Type that represents a vector of string pub type StringVector = Vec; -/// Type that represents a nonce +/// Type that represents a nonce. pub type Nonce = u64; -/// The delimeter that separates the messages to gossip -pub(super) const GOSSIP_MESSAGE_SEPARATOR: &str = "~#~"; - /// Time to wait (in seconds) for the node (network layer) to boot. pub(super) const BOOT_WAIT_TIME: Seconds = 1; /// The buffer capacity of an mpsc stream. pub(super) const STREAM_BUFFER_CAPACITY: usize = 100; -/// Data exchanged over a stream between the application and network layer +/// Data exchanged over a stream between the application and network layer. #[derive(Debug, Clone)] pub(super) enum StreamData { /// Application data sent over the stream. @@ -84,7 +89,7 @@ pub enum AppData { /// Return important information about the local routing table. KademliaGetRoutingTableInfo, /// Fetch data(s) quickly from a peer over the network. - FetchData { keys: RpcData, peer: PeerId }, + SendRpc { keys: RpcData, peer: PeerId }, /// Get network information about the node. GetNetworkInfo, /// Send message to gossip peers in a mesh network. @@ -126,7 +131,7 @@ pub enum AppResponse { /// Routing table information. KademliaGetRoutingTableInfo { protocol_id: String }, /// Result of RPC operation. - FetchData(RpcData), + SendRpc(RpcData), /// A network error occured while executing the request. Error(NetworkError), /// Important information about the node. @@ -175,6 +180,24 @@ pub enum NetworkError { GossipsubBroadcastMessageError, #[error("failed to join a mesh network")] GossipsubJoinNetworkError, + #[error("failed to exit a mesh network")] + GossipsubExitNetworkError, + #[error("internal stream failed to transport data")] + InternalStreamError, + #[error("replica network not found")] + MissingReplNetwork, + #[error("network id for sharding has not been configured. See `CoreBuilder::with_shard()`")] + MissingShardingNetworkIdError, + #[error("threshold for data forwarding not met")] + DataForwardingError, + #[error("failed to shard data")] + ShardingFailureError, + #[error("failed to fetch sharded data")] + ShardingFetchError, + #[error("shard not found for input key")] + ShardNotFound, + #[error("no nodes found in logical shard")] + MissingShardNodesError, } /// A simple struct used to track requests sent from the application layer to the network layer. @@ -194,9 +217,6 @@ impl StreamId { } } -/// Type that contains the result of querying the network layer. -pub type NetworkResult = Result; - /// Type that keeps track of the requests from the application layer. /// This type has a maximum buffer size and will drop subsequent requests when full. /// It is unlikely to be ever full as the default is usize::MAX except otherwise specified during @@ -482,8 +502,11 @@ pub enum NetworkEvent { /// /// # Fields /// - /// - `source`: The `PeerId` of the source peer. /// - `data`: The data contained in the gossip message. + /// - `outgoing_timestamp`: The time the message left the source + /// - `outgoing_timestamp`: The time the message was recieved + /// - `message_id`: The unique id of the message + /// - `source`: The `PeerId` of the source peer. ReplicaDataIncoming { /// Data data: StringVector, @@ -491,10 +514,21 @@ pub enum NetworkEvent { outgoing_timestamp: Seconds, /// Timestamp at which the message arrived incoming_timestamp: Seconds, - /// Message Id to prevent deduplication. It is usually a hash of the incoming message + /// Message ID to prevent deduplication. It is usually a hash of the incoming message message_id: String, /// Sender PeerId - sender: PeerId, + source: PeerId, + }, + /// Event that announces the arrival of a forwarded sharded data + /// + /// # Fields + /// + /// - `data`: The data contained in the gossip message. + IncomingForwardedData { + /// Data + data: StringVector, + /// Sender's PeerId + source: PeerId, }, /// Event that announces the arrival of a gossip message. /// @@ -525,13 +559,13 @@ pub enum NetworkEvent { /// The struct that contains incoming information about a peer returned by the `Identify` protocol. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct IdentifyInfo { - /// The public key of the remote peer + /// The public key of the remote peer. pub public_key: PublicKey, - /// The address the remote peer is listening on + /// The address the remote peer is listening on. pub listen_addrs: Vec, - /// The protocols supported by the remote peer + /// The protocols supported by the remote peer. pub protocols: Vec, - /// The address we are listened on, observed by the remote peer + /// The address we are listened on, observed by the remote peer. pub observed_addr: Multiaddr, } @@ -545,12 +579,14 @@ pub(super) struct NetworkInfo { pub ping: PingInfo, /// Important information to manage `Gossipsub` operations. pub gossipsub: gossipsub_cfg::GossipsubInfo, - /// The function that handles incoming RPC data request and produces a response + /// The function that handles incoming RPC data request and produces a response. pub rpc_handler_fn: fn(RpcData) -> RpcData, - /// The function to filter incoming gossip messages + /// The function to filter incoming gossip messages. pub gossip_filter_fn: fn(PeerId, MessageId, Option, String, StringVector) -> bool, /// Important information to manage `Replication` operations. - pub replication: replica_cfg::ReplInfo, + pub replication: replication::ReplInfo, + /// Important information to manage `sharding` operations. + pub sharding: sharding::ShardingInfo, } /// Module that contains important data structures to manage `Ping` operations on the network. @@ -600,218 +636,6 @@ pub mod ping_config { } } -/// Module that contains important data structures to manage `Replication` operations on the network -pub mod replica_cfg { - use super::*; - use crate::ReplConfigData; - use std::{cmp::Ordering, sync::Arc, time::SystemTime}; - - /// Struct containing important information for replication - #[derive(Clone)] - pub struct ReplInfo { - /// Internal state for replication - pub state: Arc>, - } - - /// Struct containing configurations for replication - #[derive(Clone)] - pub enum ReplNetworkConfig { - /// No expiry, queue operates in a FIFO manner and message are dropped - /// when the buffer is full - NoExpiry, - /// A custom configuration. - /// - /// # Fields - /// - /// - `queue_length`: Max capacity for transient storage - /// - `expiry_time`: Expiry time of data in the buffer if the buffer is full - /// - `sync_epoch`: Epoch to attempt network synchronization of data in the buffer - Custom { - queue_length: u64, - expiry_time: Seconds, - sync_epoch: Seconds, - }, - /// A default Configuration (queue_length = 100, expiry_time = 60 seconds, - /// sychronization_epoch = 5 seconds) - Default, - } - - /// Important data to marshall from incoming relication payload and store in the transient - /// buffer - #[derive(Clone, Debug)] - pub struct ReplBufferData { - /// Raw incoming data - pub data: StringVector, - /// Timestamp at which the message left the sending node - pub outgoing_timestamp: Seconds, - /// Timestamp at which the message arrived - pub incoming_timestamp: Seconds, - /// Message Id to prevent deduplication. It is usually a hash of the incoming message - pub message_id: String, - /// Sender PeerId - pub sender: PeerId, - } - - /// Implement Ord - impl Ord for ReplBufferData { - fn cmp(&self, other: &Self) -> Ordering { - self.outgoing_timestamp - .cmp(&other.outgoing_timestamp) // Compare by outgoing_timestamp first - .then_with(|| self.message_id.cmp(&other.message_id)) // Then compare by message_id - } - } - - /// Implement PartialOrd - impl PartialOrd for ReplBufferData { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } - } - - /// Implement Eq - impl Eq for ReplBufferData {} - - /// Implement PartialEq - impl PartialEq for ReplBufferData { - fn eq(&self, other: &Self) -> bool { - self.outgoing_timestamp == other.outgoing_timestamp - && self.message_id == other.message_id - } - } - - /// Transient buffer queue where incoming replicated data are stored - pub struct ReplicaBufferQueue { - /// Configuration - config: ReplNetworkConfig, - /// Internal queue implementation - queue: Mutex>, - } - - impl ReplicaBufferQueue { - /// The max capacity of the buffer - const MAX_CAPACITY: u64 = 150; - - /// Expiry time of data in the buffer if the buffer is full - const EXPIRY_TIME: Seconds = 60; - - /// Epoch to attempt network synchronization of data in the buffer - const SYNC_EPOCH: Seconds = 5; - - /// Create a new instance of [ReplicaBufferQueue] - pub fn new(config: ReplNetworkConfig) -> Self { - Self { - queue: Mutex::new(BTreeSet::new()), - config, - } - } - - /// Push a new [ReplBufferData] item into the buffer - pub async fn push(&self, data: ReplBufferData) { - let mut queue = self.queue.lock().await; // Lock the queue to modify it - - // The behaviour of the push operation and its corresponding actions e.g removing an - // item from the queue is based on configuration - - match self.config { - // Default implementation supports expiry of buffer items and values are based on - // structs contants - ReplNetworkConfig::Default => { - // If the queue is full, remove expired data first - while queue.len() as u64 >= Self::MAX_CAPACITY { - // Check and remove expired data - let current_time = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(); - let mut expired_items = Vec::new(); - - // Identify expired items and collect them for removal - for entry in queue.iter() { - if current_time - entry.outgoing_timestamp >= Self::EXPIRY_TIME { - expired_items.push(entry.clone()); - } - } - - // Remove expired items - for expired in expired_items { - queue.remove(&expired); - } - - // If no expired items were removed, pop the front (oldest) item - if queue.len() as u64 >= Self::MAX_CAPACITY { - if let Some(first) = queue.iter().next().cloned() { - queue.remove(&first); - } - } - } - }, - // There is no expiry time for buffer items and removal is only done when buffer us full - ReplNetworkConfig::NoExpiry => { - while queue.len() as u64 >= Self::MAX_CAPACITY { - // Pop the front (oldest) item - if queue.len() as u64 >= Self::MAX_CAPACITY { - if let Some(first) = queue.iter().next().cloned() { - queue.remove(&first); - } - } - } - }, - // Here decay applies in addition to removal of excess buffer content - ReplNetworkConfig::Custom { - queue_length, - expiry_time, - .. - } => { - // If the queue is full, remove expired data first - while queue.len() as u64 >= queue_length { - // Check and remove expired data - let current_time = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(); - let mut expired_items = Vec::new(); - - // Identify expired items and collect them for removal - for entry in queue.iter() { - if current_time - entry.outgoing_timestamp >= expiry_time { - expired_items.push(entry.clone()); - } - } - - // Remove expired items - for expired in expired_items { - queue.remove(&expired); - } - - // If no expired items were removed, pop the front (oldest) item - if queue.len() as u64 >= Self::MAX_CAPACITY { - if let Some(first) = queue.iter().next().cloned() { - queue.remove(&first); - } - } - } - }, - } - - // Finally, insert the new data into the queue - queue.insert(data); - } - - // Pop the front (smallest element) from the queue - pub async fn pop_front(&self) -> Option { - let mut queue = self.queue.lock().await; - if let Some(first) = queue.iter().next().cloned() { - // Remove the front element (smallest) - queue.remove(&first); - Some(first) - } else { - // Empty queue - None - } - } - } -} - /// Module containing important state relating to the `Gossipsub` protocol. pub mod gossipsub_cfg { use super::*; @@ -883,7 +707,7 @@ where /// Append an item to the queue. pub async fn push(&self, item: T) { let mut buffer = self.buffer.lock().await; - if buffer.len() > MAX_QUEUE_ELEMENTS - 1 { + if buffer.len() >= MAX_QUEUE_ELEMENTS { buffer.pop_front(); } buffer.push_back(item); diff --git a/swarm_nl/src/core/replication.rs b/swarm_nl/src/core/replication.rs new file mode 100644 index 000000000..0ae72867d --- /dev/null +++ b/swarm_nl/src/core/replication.rs @@ -0,0 +1,1040 @@ +// Copyright 2024 Algorealm, Inc. +// Apache 2.0 License + +//! Module that contains important data structures to manage `Replication` operations on the +//! network. + +use super::*; +use std::{cmp::Ordering, collections::BTreeMap, sync::Arc, time::SystemTime}; + +/// Struct respresenting data for configuring node replication. +#[derive(Clone, Default, Debug)] +pub struct ReplConfigData { + /// Lamport's clock for synchronization. + pub lamport_clock: Nonce, + /// Clock of last data consumed from the replica buffer + pub last_clock: Nonce, + /// Replica nodes described by their addresses. + pub nodes: HashMap, +} + +/// Struct containing important information for replication. +#[derive(Clone)] +pub struct ReplInfo { + /// Internal state for replication. + pub state: Arc>>, +} + +/// The consistency models supported. +/// +/// This is important as is determines the behaviour of the node in handling and delivering +/// replicated data to the application layer. There are also trade-offs to be considered +/// before choosing any model. You must choose the model that aligns and suits your exact +/// usecase and objective. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ConsistencyModel { + /// Eventual consistency + Eventual, + /// Strong consistency + Strong(ConsensusModel), +} + +/// This enum dictates how many nodes need to come to an agreement for consensus to be held +/// during the impl of a strong consistency sync model. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ConsensusModel { + /// All nodes in the network must contribute to consensus + All, + /// Just a subset of the network are needed for consensus + MinPeers(u64), +} + +/// Enum containing configurations for replication. +#[derive(Clone, Debug)] +pub enum ReplNetworkConfig { + /// A custom configuration. + Custom { + /// Max capacity for transient storage. + queue_length: u64, + /// Expiry time of data in the buffer if the buffer is full. Set to `None` for no expiry. + expiry_time: Option, + /// Epoch to wait before attempting the next network synchronization of data in the buffer. + sync_wait_time: Seconds, + /// The data consistency model to be supported by the node. This must be uniform across all + /// nodes to prevent undefined behaviour. + consistency_model: ConsistencyModel, + /// When data has arrived and is saved into the buffer, the time to wait for it to get to + /// other peers after which it can be picked for synchronization. + data_aging_period: Seconds, + }, + /// A default configuration: `queue_length` = 100, `expiry_time` = 60 seconds, + /// `sync_wait_time` = 5 seconds, `consistency_model`: `Eventual`, `data_wait_period` = 5 + /// seconds. + Default, +} + +/// Important data to marshall from incoming relication payload and store in the transient +/// buffer. +#[derive(Clone, Debug)] +pub struct ReplBufferData { + /// Raw incoming data. + pub data: StringVector, + /// Lamports clock for synchronization. + pub lamport_clock: Nonce, + /// Timestamp at which the message left the sending node. + pub outgoing_timestamp: Seconds, + /// Timestamp at which the message arrived. + pub incoming_timestamp: Seconds, + /// Message ID to prevent deduplication. It is usually a hash of the incoming message. + pub message_id: String, + /// Sender PeerId. + pub sender: PeerId, + /// Number of confirmations. This is to help the nodes using the strong consistency + /// synchronization data model to come to an agreement + pub confirmations: Option, +} + +/// Implement Ord. +impl Ord for ReplBufferData { + fn cmp(&self, other: &Self) -> Ordering { + self.lamport_clock + .cmp(&other.lamport_clock) // Compare by lamport_clock first + .then_with(|| self.message_id.cmp(&other.message_id)) // Then compare by message_id + } +} + +/// Implement PartialOrd. +impl PartialOrd for ReplBufferData { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +/// Implement Eq. +impl Eq for ReplBufferData {} + +/// Implement PartialEq. +impl PartialEq for ReplBufferData { + fn eq(&self, other: &Self) -> bool { + self.lamport_clock == other.lamport_clock && self.message_id == other.message_id + } +} + +/// Transient buffer queue where incoming replicated data are stored temporarily. +pub(crate) struct ReplicaBufferQueue { + /// Configuration for replication and general synchronization. + config: ReplNetworkConfig, + /// In the case of a strong consistency model, this is where data is buffered + /// initially before it is agreed upon by the majority of the network. After which + /// it is then moved to the queue exposed to the application layer. + temporary_queue: Mutex>>, + /// Internal buffer containing replicated data to be consumed by the application layer. + queue: Mutex>>, +} + +impl ReplicaBufferQueue { + /// The default max capacity of the buffer. + const MAX_CAPACITY: u64 = 150; + + /// The default expiry time of data in the buffer, when the buffer becomes full. + const EXPIRY_TIME: Seconds = 60; + + /// The default epoch to wait before attempting the next network synchronization. + const SYNC_WAIT_TIME: Seconds = 5; + + /// The default aging period after which the data can be synchronized across the network. + const DATA_AGING_PERIOD: Seconds = 5; + + /// Create a new instance of [ReplicaBufferQueue]. + pub fn new(config: ReplNetworkConfig) -> Self { + Self { + config, + temporary_queue: Mutex::new(Default::default()), + queue: Mutex::new(Default::default()), + } + } + + /// Return the configured [`ConsistencyModel`] for data synchronization. + pub fn consistency_model(&self) -> ConsistencyModel { + match self.config { + // Default config always supports eventual consistency + ReplNetworkConfig::Default => ConsistencyModel::Eventual, + ReplNetworkConfig::Custom { + consistency_model, .. + } => consistency_model, + } + } + + /// Initialize a replica networks data buffer. This occurs immediately after joining a new + /// network. + pub async fn init(&self, repl_network: String) { + // Initialize primary public buffer + let mut queue = self.queue.lock().await; + queue.insert(repl_network.clone(), Default::default()); + + // Initialize transient buffer, in case of a string consistency model + let mut queue = self.temporary_queue.lock().await; + queue.insert(repl_network.clone(), Default::default()); + } + + /// Push a new [ReplBufferData] item into the buffer. + pub async fn push(&self, mut core: Core, replica_network: String, data: ReplBufferData) { + // Different behaviours based on configurations + match self.config { + // Default implementation supports expiry of buffer items + ReplNetworkConfig::Default => { + // Lock the queue to modify it + let mut queue = self.queue.lock().await; + + // Filter into replica network the data belongs to. + // If it doesn't exist, create new + let queue = queue.entry(replica_network).or_default(); + + // If the queue is full, remove expired data first + while queue.len() as u64 >= Self::MAX_CAPACITY { + // Check and remove expired data + let current_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + let mut expired_items = Vec::new(); + + // Identify expired items and collect them for removal + for entry in queue.iter() { + if current_time.saturating_sub(entry.outgoing_timestamp) + >= Self::EXPIRY_TIME + { + expired_items.push(entry.clone()); + } + } + + // Remove expired items + for expired in expired_items { + queue.remove(&expired); + } + + // If no expired items were removed, pop the front (oldest) item + if queue.len() as u64 >= Self::MAX_CAPACITY { + if let Some(first) = queue.iter().next().cloned() { + queue.remove(&first); + } + } + } + + // Insert data right into the final queue + queue.insert(data); + }, + // Here decay applies in addition to removal of excess buffer content + ReplNetworkConfig::Custom { + queue_length, + expiry_time, + consistency_model, + .. + } => { + // Which buffer the incoming data will interact with initially is determined by + // the supported data consistency model + match consistency_model { + // For eventual consistency, data is written straight into the final queue + // for consumption + ConsistencyModel::Eventual => { + // Lock the queue to modify it + let mut queue = self.queue.lock().await; + + // Filter into replica network the data belongs to. + // If it doesn't exist, create new + let queue = queue.entry(replica_network).or_default(); + + // If the queue is full, remove expired data first + while queue.len() as u64 >= queue_length { + // Remove only when data expiration is supported + if let Some(expiry_time) = expiry_time { + // Check and remove expired data + let current_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + let mut expired_items = Vec::new(); + + // Identify expired items and collect them for removal + for entry in queue.iter() { + if current_time.saturating_sub(entry.outgoing_timestamp) + >= expiry_time + { + expired_items.push(entry.clone()); + } + } + + // Remove expired items + for expired in expired_items { + queue.remove(&expired); + } + } + + // If no expired items were removed, pop the front (oldest) item + if queue.len() as u64 >= queue_length { + if let Some(first) = queue.iter().next().cloned() { + queue.remove(&first); + } + } + } + + // Insert data right into the final queue + queue.insert(data); + }, + // Here data is written into the temporary buffer first, for finalization to + // occur. It is then moved into the final queue after favourable consensus + // has been reached. + ConsistencyModel::Strong(consensus_model) => { + // Lock the queue to modify it + let mut temp_queue = self.temporary_queue.lock().await; + + // Filter into replica network the data belongs to. + // If it doesn't exist, create new + let temp_queue = temp_queue.entry(replica_network.clone()).or_default(); + + // Remove the first item from the queue. No decay applies here + if temp_queue.len() as u64 >= Self::MAX_CAPACITY { + if let Some(first_key) = temp_queue.keys().next().cloned() { + temp_queue.remove(&first_key); + } + } + + // Check whether we are 1 of 2 members of the replica network. + // Send a request to swarm + let replica_peers = core.replica_peers(&replica_network).await.len(); + + // Put into the primary public buffer directly if we are only two in the + // network or the consensus model required only one node confirmations + if replica_peers == 1 || consensus_model == ConsensusModel::MinPeers(1) { + let mut queue = self.queue.lock().await; + let entry = queue.entry(replica_network.clone()).or_default(); + + // Insert into queue + entry.insert(data); + } else { + // Get message ID + let message_id = data.message_id.clone(); + + // Insert data into queue. Confirmation count is already 1 + temp_queue.insert(data.message_id.clone(), data); + + // Start strong consistency synchronization algorithm: + // Broadcast just recieved message to peers to increase the + // confirmation. It is just the message ID that will be broadcast + let message = vec![ + Core::STRONG_CONSISTENCY_FLAG.as_bytes().to_vec(), /* Strong Consistency Sync Gossip Flag */ + replica_network.clone().into(), /* Replica network */ + message_id.as_bytes().into(), /* Message id */ + ]; + + // Prepare a gossip request + let gossip_request = AppData::GossipsubBroadcastMessage { + topic: replica_network.into(), + message, + }; + + // Gossip data to replica nodes + let _ = core.query_network(gossip_request).await; + } + }, + } + }, + } + } + + // Pop the front (earliest data) from the queue + pub async fn pop_front(&self, core: Core, replica_network: &str) -> Option { + // Lock the queue and extract the replica network's queue + let first_data = { + let mut queue = self.queue.lock().await; + + if let Some(queue) = queue.get_mut(replica_network) { + if let Some(first) = queue.iter().next().cloned() { + // Remove the front element from the queue + queue.remove(&first); + Some(first) + } else { + None + } + } else { + None + } + }; + + // If no data to process, return early + let first = first_data?; + + // Lock replication state to update lamport clock + { + let mut cfg = core.network_info.replication.state.lock().await; + + let entry = cfg + .entry(replica_network.to_owned()) + .or_insert_with(|| ReplConfigData { + lamport_clock: 0, + last_clock: first.lamport_clock, + nodes: Default::default(), + }); + + // Update the clock + entry.last_clock = first.lamport_clock; + } + + Some(first) + } + + pub async fn handle_data_confirmation( + &self, + mut core: Core, + replica_network: String, + message_id: String, + ) { + // Determine the number of peers required for consensus + let peers_count = match self.config { + ReplNetworkConfig::Custom { + consistency_model, .. + } => match consistency_model { + ConsistencyModel::Eventual => 0, + ConsistencyModel::Strong(consensus_model) => match consensus_model { + ConsensusModel::All => { + // Get total number of replica peers + core.replica_peers(&replica_network).await.len() as u64 + }, + ConsensusModel::MinPeers(required_peers) => required_peers, + }, + }, + ReplNetworkConfig::Default => 0, + }; + + // Update confirmation count while holding the lock minimally + let is_fully_confirmed = { + let mut flag = false; + let mut temporary_queue = self.temporary_queue.lock().await; + if let Some(temp_queue) = temporary_queue.get_mut(&replica_network) { + if let Some(data_entry) = temp_queue.get_mut(&message_id) { + if data_entry.confirmations.unwrap() < peers_count { + // Increment confirmation count + data_entry.confirmations = Some(data_entry.confirmations.unwrap_or(1) + 1); + } + // Check if confirmations meet required peers + flag = peers_count != 0 && data_entry.confirmations == Some(peers_count); + } + } + + flag + }; + + // If fully confirmed, move data to the public queue + if is_fully_confirmed { + let mut public_queue = self.queue.lock().await; + let public_queue = public_queue + .entry(replica_network.clone()) + .or_insert_with(BTreeSet::new); + + // Cleanup expired or excessive entries + if let ReplNetworkConfig::Custom { + queue_length, + expiry_time, + .. + } = self.config + { + // Remove oldest items if queue exceeds capacity, expired first + if public_queue.len() as u64 >= queue_length { + let current_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + + // Remove expired items + if let Some(expiry_time) = expiry_time { + public_queue.retain(|entry| { + current_time.saturating_sub(entry.outgoing_timestamp) < expiry_time + }); + } + } + + // If no expired content, or expiry is disabled, then remove first queue element + while public_queue.len() as u64 >= queue_length { + if let Some(first) = public_queue.iter().next().cloned() { + public_queue.remove(&first); + } + } + } + + // Move confirmed entry to public queue + let mut temporary_queue = self.temporary_queue.lock().await; + if let Some(temp_queue) = temporary_queue.get_mut(&replica_network) { + if let Some(data_entry) = temp_queue.remove(&message_id) { + public_queue.insert(data_entry); + } + } + } + } + + /// Synchronize the data in the buffer queue using eventual consistency. + pub async fn sync_with_eventual_consistency(&self, core: Core, repl_network: String) { + loop { + let repl_network = repl_network.clone(); + let mut core = core.clone(); + + // Get configured aging period + let data_aging_time = match self.config { + ReplNetworkConfig::Default => Self::DATA_AGING_PERIOD, + ReplNetworkConfig::Custom { + data_aging_period, .. + } => data_aging_period, + }; + + // Fetch local data state while holding the lock minimally + let local_data_state = { + let queue = self.queue.lock().await; + queue.get(&repl_network).cloned() + }; + + if let Some(local_data_state) = local_data_state { + // Filter data outside the lock + let local_data = local_data_state + .iter() + .filter(|&d| { + util::get_unix_timestamp().saturating_sub(d.incoming_timestamp) + > data_aging_time + }) + .cloned() + .collect::>(); + + // Extract the bounding Lamport clocks + let (min_clock, max_clock) = + if let (Some(first), Some(last)) = (local_data.first(), local_data.last()) { + (first.lamport_clock, last.lamport_clock) + } else { + // Default values if no data is present + (0, 0) + }; + + // Extract message IDs for synchronization + let mut message_ids = local_data + .iter() + .map(|data| { + // Make the ID a concatenation of the message Id and it's original pubishing + // peer + let id = data.message_id.clone() + + Core::ENTRY_DELIMITER + + &data.sender.to_string(); + id.into() + }) + .collect::(); + + // Prepare gossip message + let mut message = vec![ + // Strong Consistency Sync Gossip Flag + Core::EVENTUAL_CONSISTENCY_FLAG.as_bytes().to_vec(), + // Node's Peer ID + core.peer_id().to_string().into(), + repl_network.clone().into(), + min_clock.to_string().into(), + max_clock.to_string().into(), + ]; + + // Append the message IDs + message.append(&mut message_ids); + + // Broadcast gossip request + let gossip_request = AppData::GossipsubBroadcastMessage { + topic: repl_network.into(), + message, + }; + + let _ = core.query_network(gossip_request).await; + } + + // Wait for a defined duration before the next sync + #[cfg(feature = "tokio-runtime")] + tokio::time::sleep(Duration::from_secs(Self::SYNC_WAIT_TIME)).await; + + #[cfg(feature = "async-std-runtime")] + async_std::task::sleep(Duration::from_secs(Self::SYNC_WAIT_TIME)).await; + } + } + + /// Synchronize incoming buffer image from a replica node with the local buffer image. + pub async fn sync_buffer_image( + &self, + mut core: Core, + repl_peer_id: PeerIdString, + repl_network: String, + lamports_clock_bound: (u64, u64), + replica_data_state: StringVector, + ) { + // Only when the clock of the last consumed buffer is greater than or equal to the higher + // clock sync bound, will the synchronizatio occur + let state = core.network_info.replication.state.lock().await; + if let Some(state) = state.get(&repl_network) { + if state.last_clock >= lamports_clock_bound.1 { + return; + } + } + + // Free `Core` + drop(state); + + // Convert replica data state into a set outside the mutex lock. + // Filter replica buffer too so it doesn't contain the data that we published. + // This is done using the messageId since by gossipsub, messageId = (Publishing + // peerId + Nonce) + let replica_buffer_state = replica_data_state + .into_iter() + .filter(|id| !id.contains(&core.peer_id().to_string())) + .map(|id| { + // Extract message Id + let msg_id = id.split(Core::ENTRY_DELIMITER).collect::>()[0]; + msg_id.into() + }) + .collect::>(); + + // Extract local buffer state and filter it while keeping the mutex lock duration + // minimal + let mut missing_msgs = { + let mut queue = self.queue.lock().await; + if let Some(local_state) = queue.get_mut(&repl_network) { + let local_buffer_state = local_state + .iter() + .filter(|data| { + data.lamport_clock >= lamports_clock_bound.0 + && data.lamport_clock <= lamports_clock_bound.1 + }) + .map(|data| data.message_id.clone()) + .collect::>(); + + // Extract messages missing from our local buffer + replica_buffer_state + .difference(&local_buffer_state) + .cloned() + .map(|id| id.into()) + .collect::() + } else { + return; // If the network state doesn't exist, exit early + } + }; + + if !missing_msgs.is_empty() { + // Prepare an RPC fetch request for missing messages + if let Ok(repl_peer_id) = repl_peer_id.parse::() { + let mut rpc_data: ByteVector = vec![ + Core::RPC_SYNC_PULL_FLAG.into(), // RPC sync pull flag + repl_network.clone().into(), // Replica network + ]; + + // Append the missing message ids to the request data + rpc_data.append(&mut missing_msgs); + + // Prepare an RPC to ask the replica node for missing data + let fetch_request = AppData::SendRpc { + keys: rpc_data, + peer: repl_peer_id, + }; + + // Send the fetch request + if let Ok(response) = core.query_network(fetch_request).await { + if let AppResponse::SendRpc(messages) = response { + // Parse response + let response = util::unmarshal_messages(messages); + + // Re-lock the mutex only for inserting new messages + let mut queue = self.queue.lock().await; + if let Some(local_state) = queue.get_mut(&repl_network) { + for missing_msg in response { + local_state.insert(missing_msg); + } + } + } + } + } + } + } + + /// Pull and return missing data requested by a replica node. + pub async fn pull_missing_data( + &self, + repl_network: String, + message_ids: &[Vec], + ) -> ByteVector { + // Fetch the local state from the queue with a minimal lock + let local_state = { + let queue = self.queue.lock().await; + queue.get(&repl_network).cloned() + }; + + // If the local state exists, process the message retrieval + if let Some(local_state) = local_state { + // Check if it a clone request + let requested_msgs = if message_ids[0].is_empty() { + // Retrieve all messages in buffer + local_state.iter().collect::>() + } else { + // Retrieve messages that match the requested message IDs + local_state + .iter() + .filter(|&data| message_ids.contains(&data.message_id.as_bytes().to_vec())) + .collect::>() + }; + + // Prepare the result buffer + let mut result = Vec::new(); + + for msg in requested_msgs { + // Serialize the `data` field (Vec) into a single string, separated by + // `$$` + let joined_data = msg.data.join(Core::DATA_DELIMITER); + + // Serialize individual fields, excluding `confirmations` + let mut entry = Vec::new(); + entry.extend_from_slice(joined_data.as_bytes()); + entry.extend_from_slice(Core::FIELD_DELIMITER.to_string().as_bytes()); + entry.extend_from_slice(msg.lamport_clock.to_string().as_bytes()); + entry.extend_from_slice(Core::FIELD_DELIMITER.to_string().as_bytes()); + entry.extend_from_slice(msg.outgoing_timestamp.to_string().as_bytes()); + entry.extend_from_slice(Core::FIELD_DELIMITER.to_string().as_bytes()); + entry.extend_from_slice(msg.incoming_timestamp.to_string().as_bytes()); + entry.extend_from_slice(Core::FIELD_DELIMITER.to_string().as_bytes()); + entry.extend_from_slice(msg.message_id.as_bytes()); + entry.extend_from_slice(Core::FIELD_DELIMITER.to_string().as_bytes()); + entry.extend_from_slice(msg.sender.to_base58().as_bytes()); + + // Append the entry to the result, separated by `ENTRY_DELIMITER` + if !result.is_empty() { + result.extend_from_slice(Core::ENTRY_DELIMITER.to_string().as_bytes()); + } + result.extend(entry); + } + + return vec![result]; + } + + // Default empty result if no local state is found. + Default::default() + } + + /// Replicate and populate buffer with replica's state. + pub async fn replicate_buffer( + &self, + mut core: Core, + repl_network: String, + replica_node: PeerId, + ) -> Result<(), NetworkError> { + // Send an RPC to the node to retreive it's buffer image + let rpc_data: ByteVector = vec![ + // RPC buffer copy flag. It is the samething as the sync pull flag with an empty + // message id vector. + Core::RPC_SYNC_PULL_FLAG.into(), + repl_network.clone().into(), // Replica network + vec![], // Empty vector indicating a total PULL + ]; + + // Prepare an RPC to ask the replica node for missing data + let fetch_request = AppData::SendRpc { + keys: rpc_data, + peer: replica_node, + }; + + // Try to query the replica node and insert data gotten into buffer + let mut queue = self.queue.lock().await; + match queue.get_mut(&repl_network) { + Some(local_state) => { + // Send the fetch request + match core.query_network(fetch_request).await? { + AppResponse::SendRpc(messages) => { + // Parse response + let response = util::unmarshal_messages(messages); + // Insert into data buffer queue + for missing_msg in response { + local_state.insert(missing_msg); + } + + Ok(()) + }, + AppResponse::Error(err) => Err(err), + _ => Err(NetworkError::RpcDataFetchError), + } + }, + None => Err(NetworkError::MissingReplNetwork), + } + } +} + +#[cfg(test)] +mod tests { + + // use libp2p::dns::tokio; + use super::*; + + // Define custom ports for testing + const CUSTOM_TCP_PORT: Port = 49666; + const CUSTOM_UDP_PORT: Port = 49852; + + // Setup a node using default config + pub async fn setup_node(ports: (Port, Port)) -> Core { + let config = BootstrapConfig::default() + .with_tcp(ports.0) + .with_udp(ports.1); + + // Set up network + CoreBuilder::with_config(config).build().await.unwrap() + } + + #[test] + fn test_initialization_with_default_config() { + let buffer = ReplicaBufferQueue::new(ReplNetworkConfig::Default); + + match buffer.consistency_model() { + ConsistencyModel::Eventual => assert!(true), + _ => panic!("Consistency model not initialized correctly"), + } + } + + #[test] + fn test_initialization_with_custom_config() { + let config = ReplNetworkConfig::Custom { + queue_length: 200, + expiry_time: Some(120), + sync_wait_time: 10, + consistency_model: ConsistencyModel::Strong(ConsensusModel::All), + data_aging_period: 15, + }; + let buffer = ReplicaBufferQueue::new(config); + + match buffer.consistency_model() { + ConsistencyModel::Strong(ConsensusModel::All) => assert!(true), + _ => panic!("Consistency model not initialized correctly"), + } + + // Verify queue length + match buffer.config { + ReplNetworkConfig::Custom { queue_length, .. } => { + assert_eq!(queue_length, 200); + }, + _ => panic!("Queue length not initialized correctly"), + } + + // Verify expiry time + match buffer.config { + ReplNetworkConfig::Custom { expiry_time, .. } => { + assert_eq!(expiry_time, Some(120)); + }, + _ => panic!("Expiry time not initialized correctly"), + } + + // Verify sync wait time + match buffer.config { + ReplNetworkConfig::Custom { sync_wait_time, .. } => { + assert_eq!(sync_wait_time, 10); + }, + _ => panic!("Sync wait time not initialized correctly"), + } + + // Verify data aging period + match buffer.config { + ReplNetworkConfig::Custom { + data_aging_period, .. + } => { + assert_eq!(data_aging_period, 15); + }, + _ => panic!("Data aging period not initialized correctly"), + } + } + + // -- Buffer Queue Tests -- + + #[test] + fn test_buffer_overflow_expiry_behavior() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let expiry_period: u64 = 2; + + let config = ReplNetworkConfig::Custom { + queue_length: 4, + expiry_time: Some(expiry_period), // Set very short expiry for testing + sync_wait_time: 10, + consistency_model: ConsistencyModel::Eventual, + data_aging_period: 10, + }; + + let network = setup_node((CUSTOM_TCP_PORT, CUSTOM_UDP_PORT)).await; + let buffer = ReplicaBufferQueue::new(config); + + // Fill up buffer + for clock in 1..5 { + let data = ReplBufferData { + data: vec!["Data 1".into()], + lamport_clock: clock, + outgoing_timestamp: util::get_unix_timestamp(), + incoming_timestamp: util::get_unix_timestamp(), + message_id: "msg1".into(), + sender: PeerId::random(), + confirmations: None, + }; + + buffer + .push(network.clone(), "network1".into(), data.clone()) + .await; + } + + // Check that the first data lamport is 1 + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 1 + ); + + tokio::time::sleep(std::time::Duration::from_secs(expiry_period)).await; // Wait for expiry + + // Buffer length should be 3 now + assert_eq!(buffer.queue.lock().await.get("network1").unwrap().len(), 3); + + // Fill up buffer + buffer + .push( + network.clone(), + "network1".into(), + ReplBufferData { + data: vec!["Data 1".into()], + lamport_clock: 6, + outgoing_timestamp: util::get_unix_timestamp(), + incoming_timestamp: util::get_unix_timestamp(), + message_id: "msg1".into(), + sender: PeerId::random(), + confirmations: None, + }, + ) + .await; + + // Verify buffer length is now 4 + assert_eq!(buffer.queue.lock().await.get("network1").unwrap().len(), 4); + + // Overflow buffer + buffer + .push( + network.clone(), + "network1".into(), + ReplBufferData { + data: vec!["Data 1".into()], + lamport_clock: 42, + outgoing_timestamp: util::get_unix_timestamp(), + incoming_timestamp: util::get_unix_timestamp(), + message_id: "msg1".into(), + sender: PeerId::random(), + confirmations: None, + }, + ) + .await; + + // We expect that 6 is the first element and 42 is the second as they have not aged out + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 6 + ); + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 42 + ); + }); + } + + #[test] + fn test_buffer_overflow_no_expiry_behavior() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let config = ReplNetworkConfig::Custom { + queue_length: 4, + expiry_time: None, // Disable aging + sync_wait_time: 10, + consistency_model: ConsistencyModel::Eventual, + data_aging_period: 10, + }; + + let network = setup_node((15555, 6666)).await; + let buffer = ReplicaBufferQueue::new(config); + + for clock in 1..5 { + let data = ReplBufferData { + data: vec!["Data 1".into()], + lamport_clock: clock, + outgoing_timestamp: util::get_unix_timestamp(), + incoming_timestamp: util::get_unix_timestamp(), + message_id: "msg1".into(), + sender: PeerId::random(), + confirmations: None, + }; + + buffer + .push(network.clone(), "network1".into(), data.clone()) + .await; + } + + // Check that the first data lamport is 1 + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 1 + ); + + buffer + .push( + network.clone(), + "network1".into(), + ReplBufferData { + data: vec!["Data 1".into()], + lamport_clock: 6, + outgoing_timestamp: util::get_unix_timestamp(), + incoming_timestamp: util::get_unix_timestamp(), + message_id: "msg1".into(), + sender: PeerId::random(), + confirmations: None, + }, + ) + .await; + + // Check that the data lamports are 2 and 3 as expected + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 2 + ); + assert_eq!( + buffer + .pop_front(network.clone(), "network1") + .await + .unwrap() + .lamport_clock, + 3 + ); + }); + } + + #[test] + fn test_pop_from_empty_buffer() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let config = ReplNetworkConfig::Default; + let buffer = ReplicaBufferQueue::new(config); + + let network = setup_node((15551, 6661)).await; + + let result = buffer.pop_front(network.clone(), "network1").await; + assert_eq!(result.is_none(), true); + }); + } +} diff --git a/swarm_nl/src/core/sharding.rs b/swarm_nl/src/core/sharding.rs new file mode 100644 index 000000000..226f1afbd --- /dev/null +++ b/swarm_nl/src/core/sharding.rs @@ -0,0 +1,311 @@ +// Copyright 2024 Algorealm, Inc. +// Apache 2.0 License + +//! Module that contains important data structures to manage [`Sharding`] operations on the +//! network. +use super::*; +use async_trait::async_trait; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; + +/// Trait that interfaces with the storage layer of a node in a shard. It is important for handling +/// forwarded data requests. This is a mechanism to trap into the application storage layer to read +/// sharded data. +pub trait ShardStorage: Send + Sync + Clone { + fn fetch_data(&self, key: ByteVector) -> Option; +} + +/// Important data for the operation of the sharding protocol. +#[derive(Debug, Clone)] +pub struct ShardingInfo { + /// The id of the entire sharding network. + pub id: String, + /// Shard configuration. + pub config: ShardingCfg, + /// The shards and the various nodes they contain. + pub state: Arc>>>, +} + +/// Important config for the operation of the sharding protocol. +#[derive(Debug, Clone)] +pub struct ShardingCfg { + /// Callback to handle explicit network requests. + pub callback: fn(RpcData) -> RpcData, +} + +/// Trait that specifies sharding logic and behaviour of shards. +#[async_trait] +pub trait Sharding +where + Self::Key: Send + Sync, + Self::ShardId: ToString + Send + Sync, +{ + /// The type of the shard key e.g hash, range etc. + type Key; + /// The identifier pointing to a specific group of shards. + type ShardId; + + /// Map a key to a shard. + fn locate_shard(&self, key: &Self::Key) -> Option; + + /// Join a shard network. + async fn join_network(&self, mut core: Core, shard_id: &Self::ShardId) -> NetworkResult<()> { + // Ensure the network sharding ID is set. + let network_shard_id: Vec = match &core.network_info.sharding.id { + id if !id.is_empty() => id.clone().into(), + _ => return Err(NetworkError::MissingShardingNetworkIdError), + }; + let network_sharding_id = String::from_utf8_lossy(&network_shard_id).to_string(); + + // Join the generic shard (gossip) network + let gossip_request = AppData::GossipsubJoinNetwork(network_sharding_id.clone()); + let _ = core.query_network(gossip_request).await?; + + // Update the local shard state + let mut shard_state = core.network_info.sharding.state.lock().await; + shard_state + .entry(shard_id.to_string()) + .or_insert_with(Default::default) + .push(core.peer_id()); + + // Free `Core` + drop(shard_state); + + // Join the shard network + let gossip_request = AppData::GossipsubJoinNetwork(shard_id.to_string()); + let _ = core.query_network(gossip_request).await?; + + // Inform the entire network about out decision + let message = vec![ + Core::SHARD_GOSSIP_JOIN_FLAG.as_bytes().to_vec(), // Flag for join event. + core.peer_id().to_string().into_bytes(), // Our peer ID. + shard_id.to_string().into_bytes(), // Shard we're joining + ]; + + let gossip_request = AppData::GossipsubBroadcastMessage { + topic: network_sharding_id, + message, + }; + + // Gossip the join event to all nodes. + core.query_network(gossip_request).await?; + + Ok(()) + } + + /// Exit a shard network. + async fn exit_network(&self, mut core: Core, shard_id: &Self::ShardId) -> NetworkResult<()> { + // First, we remove ourself from the network state + let mut shard_state = core.network_info.sharding.state.lock().await; + let shard_entry = shard_state + .entry(shard_id.to_string()) + .or_insert(Default::default()); + + shard_entry.retain(|entry| entry != &core.peer_id()); + + // Release `core` + drop(shard_state); + + // Then, we make a broadcast + let message = vec![ + Core::SHARD_GOSSIP_EXIT_FLAG.to_string().into(), // Appropriate flag + core.peer_id().to_base58().into(), // Our peerId + shard_id.to_string().into(), // Network we're leaving + ]; + + // Prepare a gossip request + let gossip_request = AppData::GossipsubBroadcastMessage { + topic: core.network_info.sharding.id.clone(), + message, + }; + + let _ = core.query_network(gossip_request).await?; + + // Check if we're in any shard + let shard_state = core.network_info.sharding.state.lock().await; + if !shard_state + .iter() + .any(|(_, peers)| peers.contains(&core.peer_id())) + { + // Release `core` + drop(shard_state); + + // Leave the underlying sharding (gossip) network + let gossip_request = + AppData::GossipsubJoinNetwork(core.network_info.sharding.id.clone()); + core.query_network(gossip_request).await?; + } + + Ok(()) + } + + /// Send data to peers in the appropriate logical shard. It returns the data if the node is a + /// member of the shard after replicating it to fellow nodes in the same shard. + async fn shard( + &self, + mut core: Core, + key: &Self::Key, + data: ByteVector, + ) -> NetworkResult> { + // Locate the shard that would store the key. + let shard_id = match self.locate_shard(key) { + Some(shard_id) => shard_id, + None => return Err(NetworkError::ShardNotFound), + }; + + // Retrieve the nodes in the logical shard. + let nodes = { + let shard_state = core.network_info.sharding.state.lock().await; + shard_state.get(&shard_id.to_string()).cloned() + }; + + // If no nodes exist for the shard, return an error. + let mut nodes = match nodes { + Some(nodes) => nodes, + None => return Err(NetworkError::MissingShardNodesError), + }; + + // Check if the current node is part of the shard. + if nodes.contains(&core.peer_id()) { + // Replicate the data to nodes in the shard. + let _ = core.replicate(data.clone(), &shard_id.to_string()).await; + return Ok(Some(data)); // Return the data to the caller. + } + + // Prepare the message for data forwarding. + let mut message = vec![ + Core::RPC_DATA_FORWARDING_FLAG.as_bytes().to_vec(), /* Flag to indicate data + * forwarding. */ + shard_id.to_string().into_bytes(), + ]; + message.extend(data); // Append the data payload. + + // Shuffle nodes so their order of query is randomized + let mut rng = StdRng::from_entropy(); + nodes.shuffle(&mut rng); + + // Attempt to forward the data to peers. + for peer in nodes { + let rpc_request = AppData::SendRpc { + keys: message.clone(), + peer: peer.clone(), + }; + + // Query the network and return success on the first successful response. + if core.query_network(rpc_request).await.is_ok() { + return Ok(None); // Forwarding succeeded. + } + } + + // If all peers fail, return an error. + Err(NetworkError::DataForwardingError) + } + + /// Fetch data from the shard network. + async fn fetch( + &self, + mut core: Core, + key: &Self::Key, + mut data: ByteVector, + ) -> NetworkResult> { + // Locate the shard that would store the key. + let shard_id = match self.locate_shard(key) { + Some(shard_id) => shard_id, + None => return Err(NetworkError::ShardingFailureError), + }; + + // Retrieve the nodes in the logical shard. + let nodes = { + let shard_state = core.network_info.sharding.state.lock().await; + shard_state.get(&shard_id.to_string()).cloned() + }; + + // If no nodes exist for the shard, return an error. + let mut nodes = match nodes { + Some(nodes) => nodes, + None => return Err(NetworkError::ShardingFetchError), + }; + + // Check if the current node is part of the shard. + if nodes.contains(&core.peer_id()) { + // Return `None` + return Ok(None); + } + + // Shuffle the peers. + let mut rng = StdRng::from_entropy(); + nodes.shuffle(&mut rng); + + // Prepare an RPC to ask for the data from nodes in the shard. + let mut message = vec![ + Core::SHARD_RPC_REQUEST_FLAG.as_bytes().to_vec(), /* Flag to indicate shard data + * request */ + ]; + + message.append(&mut data); + + // Attempt to forward the data to peers. + for peer in nodes { + let rpc_request = AppData::SendRpc { + keys: message.clone(), + peer: peer.clone(), + }; + + // Query the network and return the response on the first successful response. + if let Ok(response) = core.query_network(rpc_request).await { + if let AppResponse::SendRpc(data) = response { + return Ok(Some(data)); + } + } + } + + // Fetch Failed + Err(NetworkError::ShardingFetchError) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_initial_shard_node_state() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + // Initialize the shared state + let state = Arc::new(Mutex::new(HashMap::new())); + let config = ShardingCfg { + callback: |_rpc| RpcData::default(), + }; + let sharding_info = ShardingInfo { + id: "test-network".to_string(), + config, + state: state.clone(), + }; + + // Simulate a shard node initialization + let shard_id = "shard-1".to_string(); + + { + let mut shard_state = state.lock().await; + shard_state.insert(shard_id.clone(), vec![]); + } + + // Check the initial state + let shard_state = state.lock().await; + assert!( + shard_state.contains_key(&shard_id), + "Shard ID should exist in the state" + ); + assert!( + shard_state.get(&shard_id).unwrap().is_empty(), + "Shard state for shard-1 should be empty" + ); + + // Validate network ID + assert_eq!( + sharding_info.id, "test-network", + "Sharding network ID should be set correctly" + ); + }); + } +} diff --git a/swarm_nl/src/core/tests/layer_communication.rs b/swarm_nl/src/core/tests/layer_communication.rs index a5c61cd34..542849599 100644 --- a/swarm_nl/src/core/tests/layer_communication.rs +++ b/swarm_nl/src/core/tests/layer_communication.rs @@ -4,11 +4,20 @@ #![allow(unused_variables)] #![allow(unused_imports)] +use std::collections::HashMap; + +use crate::{ + core::{AppData, AppResponse, Core, CoreBuilder, DataQueue, NetworkError, NetworkEvent}, + setup::BootstrapConfig, + Port, DEFAULT_NETWORK_ID, +}; + use super::*; use libp2p::{ - core::{ConnectedPoint, Multiaddr}, + core::{transport::ListenerId, ConnectedPoint, Multiaddr}, PeerId, }; +use libp2p_identity::Keypair; /// Time to wait for the other peer to act, during integration tests (in seconds). pub const ITEST_WAIT_TIME: u64 = 7; @@ -407,12 +416,6 @@ fn gossipsub_info_works() { }); } -// Tests to add -// - on event buffer / queue: flood, .. -// - pop event off the queue and match it to that specific network event enum -// - test max_que is correctly handled -// - Test handler fns work and consume next event which could be nothing - // -- Event queue tests -- const MAX_QUEUE_ELEMENTS: usize = 300; @@ -575,7 +578,7 @@ fn rpc_fetch_works() { let multi_addr = format!("/ip4/127.0.0.1/tcp/{}", 49666); // Prepare fetch request - let fetch_request = AppData::FetchData { + let fetch_request = AppData::SendRpc { keys: fetch_key.clone(), peer: node_1_peer_id, }; @@ -583,7 +586,7 @@ fn rpc_fetch_works() { let stream_id = node_2.send_to_network(fetch_request).await.unwrap(); if let Ok(result) = node_2.recv_from_network(stream_id).await { - assert_eq!(AppResponse::FetchData(fetch_key), result); + assert_eq!(AppResponse::SendRpc(fetch_key), result); } }); } @@ -787,7 +790,7 @@ fn gossipsub_message_itest_works() { fn gossipsub_message_itest_works() { tokio::runtime::Runtime::new().unwrap().block_on(async { // Set up the second node that will dial - let (mut node_2, _) = setup_node_2((49885, 49889), (51887, 51887)).await; + let (mut node_2, _) = setup_node_2((49885, 49889), (51887, 51888)).await; // Join a network (subscribe to a topic) let gossip_request = AppData::GossipsubJoinNetwork(GOSSIP_NETWORK.to_string()); @@ -798,7 +801,7 @@ fn gossipsub_message_itest_works() { // Prepare broadcast query let gossip_request = AppData::GossipsubBroadcastMessage { topic: GOSSIP_NETWORK.to_string(), - message: vec!["Apple".to_string(), "nike".to_string()], + message: vec!["Apple".to_string().into(), "nike".to_string().into()], }; if let Ok(result) = node_2.query_network(gossip_request).await { diff --git a/swarm_nl/src/core/tests/mod.rs b/swarm_nl/src/core/tests/mod.rs index 8b1c97616..16da251e9 100644 --- a/swarm_nl/src/core/tests/mod.rs +++ b/swarm_nl/src/core/tests/mod.rs @@ -1,3 +1,4 @@ -use super::*; +// use super::*; mod layer_communication; mod node_behaviour; +mod replication; diff --git a/swarm_nl/src/core/tests/node_behaviour.rs b/swarm_nl/src/core/tests/node_behaviour.rs index 6ab6ef023..11f33f0c0 100644 --- a/swarm_nl/src/core/tests/node_behaviour.rs +++ b/swarm_nl/src/core/tests/node_behaviour.rs @@ -1,213 +1,213 @@ -//! Node setup and behaviour tests. - -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - -use super::*; -use futures::TryFutureExt; -use ini::Ini; -use std::fs; -use std::fs::File; -use std::net::{Ipv4Addr, Ipv6Addr}; - -// Set up a default node helper -pub fn setup_core_builder() -> CoreBuilder { - let config = BootstrapConfig::default().with_tcp(49158).with_udp(49159); - - // Return default network core builder - CoreBuilder::with_config(config) -} - -// Define custom ports for testing -const CUSTOM_TCP_PORT: Port = 49666; -const CUSTOM_UDP_PORT: Port = 49852; - -// Used to test saving keypair to file -fn create_test_ini_file(file_path: &str) { - let mut config = Ini::new(); - config - .with_section(Some("ports")) - .set("tcp", CUSTOM_TCP_PORT.to_string()) - .set("udp", CUSTOM_UDP_PORT.to_string()); - - config.with_section(Some("bootstrap")).set( - "boot_nodes", - "[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/192.168.1.205/tcp/1509]", - ); - // Write config to a new INI file - config.write_to_file(file_path).unwrap_or_default(); -} - -#[cfg(feature = "tokio-runtime")] -#[test] -fn node_default_behavior_works() { - // Build a node with the default network id - let default_node = setup_core_builder(); - - // Assert that the default network id is '/swarmnl/1.0' - assert_eq!(default_node.network_id, DEFAULT_NETWORK_ID); - - // Default transport is TCP/QUIC - assert_eq!( - default_node.transport, - TransportOpts::TcpQuic { - tcp_config: TcpConfig::Default - } - ); - - // Default keep alive duration is 60 seconds - assert_eq!(default_node.keep_alive_duration, 60); - - // Default listen on is 0:0:0:0 - assert_eq!( - default_node.ip_address, - IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)) - ); - - let config = BootstrapConfig::default(); - let default_state = (); - - // Return default network core builder - let builder = CoreBuilder::with_config(config); - - // Default tcp/udp port is MIN_PORT and MAX_PORT - assert_eq!(builder.tcp_udp_port, (MIN_PORT, MAX_PORT)); -} - -#[test] -fn node_custom_setup_works() { - // Build a node with the default network id - let default_node = setup_core_builder(); - - // Custom node configuration - let custom_network_id = "/custom-protocol/1.0".to_string(); - let custom_transport = TransportOpts::TcpQuic { - tcp_config: TcpConfig::Custom { - ttl: 10, - nodelay: true, - backlog: 10, - }, - }; - let custom_keep_alive_duration = 20; - let custom_ip_address = IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)); - - // Pass in the custom node configuration and assert it works as expected - let custom_node = default_node - .with_network_id(custom_network_id.clone()) - .with_transports(custom_transport.clone()) - .with_idle_connection_timeout(custom_keep_alive_duration.clone()) - .listen_on(custom_ip_address.clone()); - - // Assert that the custom network id is '/custom/protocol/1.0' - assert_eq!(custom_node.network_id(), custom_network_id); - - // Assert that the custom transport is 'TcpQuic' - assert_eq!(custom_node.transport, custom_transport); - - // Assert that the custom keep alive duration is 20 - assert_eq!(custom_node.keep_alive_duration, custom_keep_alive_duration); -} - -#[test] -fn node_custom_behavior_with_network_id_works() { - // Setup a node with the default config builder - let custom_builder = setup_core_builder(); - - // Configure builder with custom protocol and assert it works as expected - let custom_protocol: &str = "/custom-protocol/1.0"; - let custom_builder = custom_builder.with_network_id(custom_protocol.to_string()); - - // Cannot be less than MIN_NETWORK_ID_LENGTH - assert_eq!( - custom_builder.network_id().len() >= MIN_NETWORK_ID_LENGTH.into(), - true - ); - - // Must start with a forward slash - assert!(custom_builder.network_id().starts_with("/")); - - // Assert that the custom network id is '/custom/protocol/1.0' - assert_eq!(custom_builder.network_id(), custom_protocol.to_string()); -} - -#[test] -#[should_panic(expected = "Could not parse provided network id")] -fn node_custom_behavior_with_network_id_fails() { - // Build a node with the default network id - let custom_builder = setup_core_builder(); - - // Pass in an invalid network ID: network ID length is less than MIN_NETWORK_ID_LENGTH - let invalid_protocol_1 = "/1.0".to_string(); - let custom_builder = custom_builder.with_network_id(invalid_protocol_1); - - // Pass in an invalid network ID: network ID must start with a forward slash - let invalid_protocol_2 = "1.0".to_string(); - custom_builder.with_network_id(invalid_protocol_2); -} - -#[cfg(feature = "tokio-runtime")] -#[test] -fn node_save_keypair_offline_works_tokio() { - // Build a node with the default network id - let default_node = setup_core_builder(); - - // Use tokio runtime to test async function - let result = tokio::runtime::Runtime::new() - .unwrap() - .block_on(async { default_node.build().await.unwrap() }); - - // Create a saved_keys.ini file - let file_path_1 = "saved_keys.ini"; - create_test_ini_file(file_path_1); - - // Save the keypair to existing file - let saved_1 = result.save_keypair_offline(&file_path_1); - - // Assert that the keypair was saved successfully - assert_eq!(saved_1, true); - - // Test if it works for a file name that does not exist - let file_path_2 = "test.ini"; - let saved_2 = result.save_keypair_offline(file_path_2); - assert_eq!(saved_2, true); - - // Clean up - fs::remove_file(file_path_1).unwrap_or_default(); - fs::remove_file(file_path_2).unwrap_or_default(); -} - -#[cfg(feature = "async-std-runtime")] -#[test] -fn node_save_keypair_offline_works_async_std() { - // Build a node with the default network id - let default_node = setup_core_builder(); - - // Use tokio runtime to test async function - let result = async_std::task::block_on( - default_node - .build() - .unwrap_or_else(|_| panic!("Could not build node")), - ); - - // Make a saved_keys.ini file - let file_path_1 = "saved_keys.ini"; - create_test_ini_file(file_path_1); - - // Save the keypair to existing file - let saved_1 = result.save_keypair_offline(file_path_1); - - // Assert that the keypair was saved successfully - assert_eq!(saved_1, true); - - // Now test if it works for a file name that does not exist - let file_path_2 = "test.txt"; - let saved_2 = result.save_keypair_offline(file_path_2); - - // Assert that the keypair was saved successfully - assert_eq!(saved_2, true); - - // Clean up - fs::remove_file(file_path_1).unwrap_or_default(); - fs::remove_file(file_path_2).unwrap_or_default(); -} +// //! Node setup and behaviour tests. + +// #![allow(dead_code)] +// #![allow(unused_variables)] +// #![allow(unused_imports)] + +// use super::*; +// use futures::TryFutureExt; +// use ini::Ini; +// use std::fs; +// use std::fs::File; +// use std::net::{Ipv4Addr, Ipv6Addr}; + +// // Set up a default node helper +// pub fn setup_core_builder() -> CoreBuilder { +// let config = BootstrapConfig::default().with_tcp(49158).with_udp(49159); + +// // Return default network core builder +// CoreBuilder::with_config(config) +// } + +// // Define custom ports for testing +// const CUSTOM_TCP_PORT: Port = 49666; +// const CUSTOM_UDP_PORT: Port = 49852; + +// // Used to test saving keypair to file +// fn create_test_ini_file(file_path: &str) { +// let mut config = Ini::new(); +// config +// .with_section(Some("ports")) +// .set("tcp", CUSTOM_TCP_PORT.to_string()) +// .set("udp", CUSTOM_UDP_PORT.to_string()); + +// config.with_section(Some("bootstrap")).set( +// "boot_nodes", +// "[12D3KooWGfbL6ZNGWqS11MoptH2A7DB1DG6u85FhXBUPXPVkVVRq:/ip4/192.168.1.205/tcp/1509]", +// ); +// // Write config to a new INI file +// config.write_to_file(file_path).unwrap_or_default(); +// } + +// #[cfg(feature = "tokio-runtime")] +// #[test] +// fn node_default_behavior_works() { +// // Build a node with the default network id +// let default_node = setup_core_builder(); + +// // Assert that the default network id is '/swarmnl/1.0' +// assert_eq!(default_node.network_id, DEFAULT_NETWORK_ID); + +// // Default transport is TCP/QUIC +// assert_eq!( +// default_node.transport, +// TransportOpts::TcpQuic { +// tcp_config: TcpConfig::Default +// } +// ); + +// // Default keep alive duration is 60 seconds +// assert_eq!(default_node.keep_alive_duration, 60); + +// // Default listen on is 0:0:0:0 +// assert_eq!( +// default_node.ip_address, +// IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)) +// ); + +// let config = BootstrapConfig::default(); +// let default_state = (); + +// // Return default network core builder +// let builder = CoreBuilder::with_config(config); + +// // Default tcp/udp port is MIN_PORT and MAX_PORT +// assert_eq!(builder.tcp_udp_port, (MIN_PORT, MAX_PORT)); +// } + +// #[test] +// fn node_custom_setup_works() { +// // Build a node with the default network id +// let default_node = setup_core_builder(); + +// // Custom node configuration +// let custom_network_id = "/custom-protocol/1.0".to_string(); +// let custom_transport = TransportOpts::TcpQuic { +// tcp_config: TcpConfig::Custom { +// ttl: 10, +// nodelay: true, +// backlog: 10, +// }, +// }; +// let custom_keep_alive_duration = 20; +// let custom_ip_address = IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)); + +// // Pass in the custom node configuration and assert it works as expected +// let custom_node = default_node +// .with_network_id(custom_network_id.clone()) +// .with_transports(custom_transport.clone()) +// .with_idle_connection_timeout(custom_keep_alive_duration.clone()) +// .listen_on(custom_ip_address.clone()); + +// // Assert that the custom network id is '/custom/protocol/1.0' +// assert_eq!(custom_node.network_id(), custom_network_id); + +// // Assert that the custom transport is 'TcpQuic' +// assert_eq!(custom_node.transport, custom_transport); + +// // Assert that the custom keep alive duration is 20 +// assert_eq!(custom_node.keep_alive_duration, custom_keep_alive_duration); +// } + +// #[test] +// fn node_custom_behavior_with_network_id_works() { +// // Setup a node with the default config builder +// let custom_builder = setup_core_builder(); + +// // Configure builder with custom protocol and assert it works as expected +// let custom_protocol: &str = "/custom-protocol/1.0"; +// let custom_builder = custom_builder.with_network_id(custom_protocol.to_string()); + +// // Cannot be less than MIN_NETWORK_ID_LENGTH +// assert_eq!( +// custom_builder.network_id().len() >= MIN_NETWORK_ID_LENGTH.into(), +// true +// ); + +// // Must start with a forward slash +// assert!(custom_builder.network_id().starts_with("/")); + +// // Assert that the custom network id is '/custom/protocol/1.0' +// assert_eq!(custom_builder.network_id(), custom_protocol.to_string()); +// } + +// #[test] +// #[should_panic(expected = "Could not parse provided network id")] +// fn node_custom_behavior_with_network_id_fails() { +// // Build a node with the default network id +// let custom_builder = setup_core_builder(); + +// // Pass in an invalid network ID: network ID length is less than MIN_NETWORK_ID_LENGTH +// let invalid_protocol_1 = "/1.0".to_string(); +// let custom_builder = custom_builder.with_network_id(invalid_protocol_1); + +// // Pass in an invalid network ID: network ID must start with a forward slash +// let invalid_protocol_2 = "1.0".to_string(); +// custom_builder.with_network_id(invalid_protocol_2); +// } + +// #[cfg(feature = "tokio-runtime")] +// #[test] +// fn node_save_keypair_offline_works_tokio() { +// // Build a node with the default network id +// let default_node = setup_core_builder(); + +// // Use tokio runtime to test async function +// let result = tokio::runtime::Runtime::new() +// .unwrap() +// .block_on(async { default_node.build().await.unwrap() }); + +// // Create a saved_keys.ini file +// let file_path_1 = "saved_keys.ini"; +// create_test_ini_file(file_path_1); + +// // Save the keypair to existing file +// let saved_1 = result.save_keypair_offline(&file_path_1); + +// // Assert that the keypair was saved successfully +// assert_eq!(saved_1, true); + +// // Test if it works for a file name that does not exist +// let file_path_2 = "test.ini"; +// let saved_2 = result.save_keypair_offline(file_path_2); +// assert_eq!(saved_2, true); + +// // Clean up +// fs::remove_file(file_path_1).unwrap_or_default(); +// fs::remove_file(file_path_2).unwrap_or_default(); +// } + +// #[cfg(feature = "async-std-runtime")] +// #[test] +// fn node_save_keypair_offline_works_async_std() { +// // Build a node with the default network id +// let default_node = setup_core_builder(); + +// // Use tokio runtime to test async function +// let result = async_std::task::block_on( +// default_node +// .build() +// .unwrap_or_else(|_| panic!("Could not build node")), +// ); + +// // Make a saved_keys.ini file +// let file_path_1 = "saved_keys.ini"; +// create_test_ini_file(file_path_1); + +// // Save the keypair to existing file +// let saved_1 = result.save_keypair_offline(file_path_1); + +// // Assert that the keypair was saved successfully +// assert_eq!(saved_1, true); + +// // Now test if it works for a file name that does not exist +// let file_path_2 = "test.txt"; +// let saved_2 = result.save_keypair_offline(file_path_2); + +// // Assert that the keypair was saved successfully +// assert_eq!(saved_2, true); + +// // Clean up +// fs::remove_file(file_path_1).unwrap_or_default(); +// fs::remove_file(file_path_2).unwrap_or_default(); +// } diff --git a/swarm_nl/src/core/tests/replication.rs b/swarm_nl/src/core/tests/replication.rs new file mode 100644 index 000000000..a839a01c7 --- /dev/null +++ b/swarm_nl/src/core/tests/replication.rs @@ -0,0 +1,1294 @@ +// Replication tests + +use libp2p::{gossipsub::MessageId, PeerId}; +use libp2p_identity::Keypair; +use std::{collections::HashMap, io, time::Duration}; +use futures::{ + channel::mpsc::{self, Receiver, Sender}, + select, SinkExt, StreamExt, +}; +use crate::{ + core::{ + gossipsub_cfg::GossipsubConfig, + replication::{ConsensusModel, ConsistencyModel, ReplNetworkConfig}, + Core, CoreBuilder, NetworkEvent, ReplicaBufferQueue, RpcConfig, + }, + setup::BootstrapConfig, + MultiaddrString, PeerIdString, Port, +}; + +/// The constant that represents the id of the replica network. +pub const REPL_NETWORK_ID: &'static str = "replica_xx"; + +/// Handle incoming RPCs. +fn rpc_incoming_message_handler(data: Vec>) -> Vec> { + // Just return incomding data + data +} + +/// Handle gossiping. +fn gossipsub_filter_fn( + propagation_source: PeerId, + message_id: MessageId, + source: Option, + topic: String, + data: Vec, +) -> bool { + true +} + +/// Create a determininstic node. +async fn setup_node( + ports: (Port, Port), + deterministic_protobuf: &[u8], + boot_nodes: HashMap, + consistency_model: ConsistencyModel, +) -> Core { + // Configure the node deterministically so we can connect to it + let mut protobuf = &mut deterministic_protobuf.to_owned()[..]; + + let config = BootstrapConfig::default() + .generate_keypair_from_protobuf("ed25519", &mut protobuf) + .with_tcp(ports.0) + .with_udp(ports.1) + // configure bootnodes, so we can connect to our sister nodes + .with_bootnodes(boot_nodes); + + // Set up network + let mut builder = CoreBuilder::with_config(config) + .with_rpc(RpcConfig::Default, rpc_incoming_message_handler) + .with_gossipsub(GossipsubConfig::Default, gossipsub_filter_fn); + + // Configure node for replication, we will be using a strong consistency model here + let repl_config = ReplNetworkConfig::Custom { + queue_length: 150, + expiry_time: Some(10), + sync_wait_time: 5, + consistency_model, + data_aging_period: 2, + }; + + builder.with_replication(repl_config).build().await.unwrap() +} + +// - joining and exit + +#[tokio::test] +async fn repl_itest_join_and_exit_works() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, + 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, + 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, + 148, 159, 36, 170, 109, 178, + ]; + + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, + 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, + 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, + 10, 127, 128, 52, 52, 68, 31, + ]; + + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, + 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, + 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, + 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49155, 55103); + let ports_2: (Port, Port) = (49153, 55101); + let ports_3: (Port, Port) = (49154, 55102); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // sleep for 3 seconds + tokio::time::sleep(Duration::from_secs(3)).await; + + // exit replica network works + let _ = node.leave_repl_network(REPL_NETWORK_ID.into()).await; + }); + + // setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // sleep for 3 seconds + tokio::time::sleep(Duration::from_secs(3)).await; + + // exit replica network works + let _ = node.leave_repl_network(REPL_NETWORK_ID.into()).await; + }); + + // setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // assert that 2 nodes have joined + assert_eq!(node.replica_peers(REPL_NETWORK_ID.into()).await.len(), 2); + + // after sleeping for 5 secons we expect there to be no more nodes in the replication + // network + tokio::time::sleep(Duration::from_secs(5)).await; + + // assert that 2 nodes have left + assert_eq!(node.replica_peers(REPL_NETWORK_ID.into()).await.len(), 0); + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } +} + +#[tokio::test] +async fn repl_itest_fully_replicate_node() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, 27, + 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, 147, 85, + 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, 152, 42, 164, + 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, 114, + 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, 240, 36, + 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, 101, 109, 235, + 10, 127, 128, 52, 52, 68, 31, + ]; + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, 190, + 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, 151, 109, + 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, 88, 194, 187, + 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49255, 55203); + let ports_2: (Port, Port) = (49253, 55201); + let ports_3: (Port, Port) = (49254, 55202); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Papayas".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 1 + node.replicate(vec!["Oranges".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Kiwis".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Sleep to wait for nodes 1 and 2 to replicate data + tokio::time::sleep(Duration::from_secs(20)).await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Assert that this node (node 3) has nothing in its buffer + assert_eq!(node.consume_repl_data(REPL_NETWORK_ID.into()).await, None); + + // Replicate the data from node 1's buffer (node 1 is the node that published node 2's data) + node.replicate_buffer(REPL_NETWORK_ID.into(), peer_id_1) + .await + .unwrap(); + + // Assert that this node (node 3) has the data from node 2 + assert_eq!( + node.consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap() + .data, + vec!["Oranges".to_string()] + ); + assert_eq!( + node.consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap() + .data, + vec!["Kiwis".to_string()] + ); + + // Replicate the data from node 2's buffer (node 2 is the node that published node 1's data) + node.replicate_buffer(REPL_NETWORK_ID.into(), peer_id_2) + .await + .unwrap(); + + // Assert that this node (node 3) has the data from node 2 + assert_eq!( + node.consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap() + .data, + vec!["Apples".to_string()] + ); + assert_eq!( + node.consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap() + .data, + vec!["Papayas".to_string()] + ); + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } +} + +// Nodes need to come to consensus before its relicated to primary buffer +// If there are two peers in the network there is no consensus to be reached, node just puts in the +// primary buffer If there is three peers, they need to reach consensus. We need to test for +// Strong(All) and Strong(MinPeers) for this. + +mod strong_consistency { + + use super::*; + use crate::core::replication::ReplBufferData; + + #[tokio::test] + async fn two_nodes_confirmations_with_all_consistency_model() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, + 27, 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, + 147, 85, 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, + 152, 42, 164, 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, + 114, 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, + 240, 36, 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, + 101, 109, 235, 10, 127, 128, 52, 52, 68, 31, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49355, 55303); + let ports_2: (Port, Port) = (49353, 55301); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Papayas".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Sleep for 4 seconds + tokio::time::sleep(Duration::from_secs(4)).await; + + let first_repl_data = node + .consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap(); + let second_repl_data = node + .consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap(); + + assert_eq!(first_repl_data.confirmations, Some(1)); + assert_eq!(first_repl_data.data, vec!["Apples".to_string()]); + + assert_eq!(second_repl_data.confirmations, Some(1)); + assert_eq!(second_repl_data.data, vec!["Papayas".to_string()]); + }); + + for task in vec![task_1, task_2] { + task.await.unwrap(); + } + } + + #[tokio::test] + async fn multi_nodes_confirmations_with_all_consistency_model() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, + 27, 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, + 147, 85, 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, + 152, 42, 164, 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, + 114, 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, + 240, 36, 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, + 101, 109, 235, 10, 127, 128, 52, 52, 68, 31, + ]; + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, + 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, + 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, + 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49455, 55403); + let ports_2: (Port, Port) = (49453, 55401); + let ports_3: (Port, Port) = (49454, 55402); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Papayas".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::All), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Sleep for 7 seconds to give time for confirmation + tokio::time::sleep(Duration::from_secs(20)).await; + + let first_repl_data = node + .consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap(); + let second_repl_data = node + .consume_repl_data(REPL_NETWORK_ID.into()) + .await + .unwrap(); + + // We expect two confirmations + assert_eq!(first_repl_data.confirmations, Some(2)); + assert_eq!(first_repl_data.data, vec!["Apples".to_string()]); + + assert_eq!(second_repl_data.confirmations, Some(2)); + assert_eq!(second_repl_data.data, vec!["Papayas".to_string()]); + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } + } + + #[tokio::test] + async fn confirmations_with_min_peer_consistency_model() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, + 27, 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, + 147, 85, 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, + 152, 42, 164, 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, + 114, 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, + 240, 36, 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, + 101, 109, 235, 10, 127, 128, 52, 52, 68, 31, + ]; + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, + 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, + 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, + 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + // Node 4 KeyPair + let node_4_keypair: [u8; 68] = [ + 8, 1, 18, 64, 4, 6, 168, 164, 84, 243, 246, 30, 251, 170, 237, 166, 76, 239, 85, 63, + 96, 207, 13, 230, 24, 186, 45, 148, 16, 36, 6, 74, 232, 181, 26, 196, 101, 194, 118, + 113, 133, 5, 144, 101, 96, 114, 239, 73, 204, 94, 74, 169, 59, 128, 188, 17, 110, 183, + 40, 91, 25, 152, 219, 30, 26, 130, 145, 160, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_4 = Keypair::from_protobuf_encoding(&node_4_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49555, 55503); + let ports_2: (Port, Port) = (49553, 55501); + let ports_3: (Port, Port) = (49554, 55502); + let ports_4: (Port, Port) = (49555, 55504); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::MinPeers(2)), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + // Setup node with consistency consistency model + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::MinPeers(2)), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::MinPeers(2)), + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 4 + let task_4 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_4, + &node_4_keypair[..], + bootnodes, + ConsistencyModel::Strong(ConsensusModel::MinPeers(2)), + ) + .await; + + // Join replica network + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + loop { + while let Some(data) = node.consume_repl_data(REPL_NETWORK_ID.into()).await { + assert_eq!(data.confirmations, Some(2)); + assert_eq!(data.data, vec!["Apples".to_string()]); + return; + } + + tokio::time::sleep(Duration::from_secs(3)).await; + } + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } + } +} + +mod eventual_consistency { + use super::*; + use libp2p_identity::Keypair; + + #[tokio::test] + async fn new_node_join_and_sync_works() { + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, + 27, 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, + 147, 85, 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, + 152, 42, 164, 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, + 114, 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, + 240, 36, 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, + 101, 109, 235, 10, 127, 128, 52, 52, 68, 31, + ]; + + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, + 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, + 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, + 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49655, 55603); + let ports_2: (Port, Port) = (49653, 55606); + let ports_3: (Port, Port) = (49654, 55602); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Papayas".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 1 + node.replicate(vec!["Oranges".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Kiwis".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Sleep to wait for nodes 1 and 2 to replicate data + tokio::time::sleep(Duration::from_secs(20)).await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Sleep to allow network wide synchronization + tokio::time::sleep(Duration::from_secs(10)).await; + + let mut number_of_messages = 0; + + while let Some(_) = node.consume_repl_data(REPL_NETWORK_ID.into()).await { + number_of_messages += 1; + } + assert_eq!(number_of_messages, 4); + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } + } + + #[tokio::test] + async fn test_lamports_clock_ordering(){ + // Node 1 keypair + let node_1_keypair: [u8; 68] = [ + 8, 1, 18, 64, 34, 116, 25, 74, 122, 174, 130, 2, 98, 221, 17, 247, 176, 102, 205, 3, + 27, 202, 193, 27, 6, 104, 216, 158, 235, 38, 141, 58, 64, 81, 157, 155, 36, 193, 50, + 147, 85, 72, 64, 174, 65, 132, 232, 78, 231, 224, 88, 38, 55, 78, 178, 65, 42, 97, 39, + 152, 42, 164, 148, 159, 36, 170, 109, 178, + ]; + // Node 2 Keypair + let node_2_keypair: [u8; 68] = [ + 8, 1, 18, 64, 37, 37, 86, 103, 79, 48, 103, 83, 170, 172, 131, 160, 15, 138, 237, 128, + 114, 144, 239, 7, 37, 6, 217, 25, 202, 210, 55, 89, 55, 93, 0, 153, 82, 226, 1, 54, + 240, 36, 110, 110, 173, 119, 143, 79, 44, 82, 126, 121, 247, 154, 252, 215, 43, 21, + 101, 109, 235, 10, 127, 128, 52, 52, 68, 31, + ]; + // Node 3 Keypair + let node_3_keypair: [u8; 68] = [ + 8, 1, 18, 64, 211, 172, 68, 234, 95, 121, 188, 130, 107, 113, 212, 215, 211, 189, 219, + 190, 137, 91, 250, 222, 34, 152, 190, 117, 139, 199, 250, 5, 33, 65, 14, 180, 214, 5, + 151, 109, 184, 106, 73, 186, 126, 52, 59, 220, 170, 158, 195, 249, 110, 74, 222, 161, + 88, 194, 187, 112, 95, 131, 113, 251, 106, 94, 61, 177, + ]; + // Node 4 KeyPair + let node_4_keypair: [u8; 68] = [ + 8, 1, 18, 64, 4, 6, 168, 164, 84, 243, 246, 30, 251, 170, 237, 166, 76, 239, 85, 63, + 96, 207, 13, 230, 24, 186, 45, 148, 16, 36, 6, 74, 232, 181, 26, 196, 101, 194, 118, + 113, 133, 5, 144, 101, 96, 114, 239, 73, 204, 94, 74, 169, 59, 128, 188, 17, 110, 183, + 40, 91, 25, 152, 219, 30, 26, 130, 145, 160, + ]; + + // Get Peer Id's + let peer_id_1 = Keypair::from_protobuf_encoding(&node_1_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_2 = Keypair::from_protobuf_encoding(&node_2_keypair) + .unwrap() + .public() + .to_peer_id(); + let peer_id_3 = Keypair::from_protobuf_encoding(&node_3_keypair) + .unwrap() + .public() + .to_peer_id(); + + let peer_id_4 = Keypair::from_protobuf_encoding(&node_4_keypair) + .unwrap() + .public() + .to_peer_id(); + + // Ports + let ports_1: (Port, Port) = (49752, 55703); + let ports_2: (Port, Port) = (49753, 55701); + let ports_3: (Port, Port) = (49754, 55702); + let ports_4: (Port, Port) = (49755, 55704); + + // Setup async channel to send network state between tasks + let (mut tx, mut rx) = mpsc::channel::>(5); + + // Setup node 1 + let task_1 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_1, + &node_1_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Send to replica node 2 + node.replicate(vec!["Apples".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Send to replica node 2 + node.replicate(vec!["Papayas".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(15)).await; + }); + + // Setup node 2 + let task_2 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + // Setup node with consistency consistency model + let mut node = setup_node( + ports_2, + &node_2_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Publish messages + node.replicate(vec!["Oranges".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + node.replicate(vec!["Kiwis".into()], &REPL_NETWORK_ID) + .await + .unwrap(); + + // Keep node running + tokio::time::sleep(Duration::from_secs(15)).await; + }); + + // Setup node 3 + let task_3 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_4.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_4.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_3, + &node_3_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network works + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // Sleep to give time for node 1 and 2 to publish data to the network + tokio::time::sleep(Duration::from_secs(20)).await; + + // Get replica buffer state + let mut buffer_state = Vec::new(); + while let Some(data) = node.consume_repl_data(REPL_NETWORK_ID.into()).await { + buffer_state.push((data.data[0].clone(), data.lamport_clock)); + } + + // Send buffer state to node 4 over mpsc channel + let _ = tx.send(buffer_state).await; + + // Keep node alive for 10 seconds so the producing end does not close + tokio::time::sleep(Duration::from_secs(10)).await; + }); + + // Setup node 4 + let task_4 = tokio::task::spawn(async move { + // Bootnodes + let mut bootnodes = HashMap::new(); + bootnodes.insert( + peer_id_1.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_1.0), + ); + bootnodes.insert( + peer_id_2.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_2.0), + ); + bootnodes.insert( + peer_id_3.to_base58(), + format!("/ip4/127.0.0.1/tcp/{}", ports_3.0), + ); + + // Setup node with consistency consistency model + let mut node = setup_node( + ports_4, + &node_4_keypair[..], + bootnodes, + ConsistencyModel::Eventual, + ) + .await; + + // Join replica network + let _ = node.join_repl_network(REPL_NETWORK_ID.into()).await; + + // We wait for 25 seconds so that node 1, 2 and 3 operations are completed + tokio::time::sleep(Duration::from_secs(25)).await; + + // Get local buffer state + let mut local_buffer_state = Vec::new(); + while let Some(data) = node.consume_repl_data(REPL_NETWORK_ID.into()).await { + local_buffer_state.push((data.data[0].clone(), data.lamport_clock)); + } + + // Get node 3's incoming buffer state + let incoming_buffer_state = rx.next().await.unwrap(); + + // Compare both buffer states and the ordering of their data + for (local_data, incoming_data) in local_buffer_state.iter().zip(incoming_buffer_state.iter()) { + assert_eq!(local_data.0, incoming_data.0); + assert_eq!(local_data.1, incoming_data.1); + } + }); + + for task in vec![task_1, task_2, task_3] { + task.await.unwrap(); + } + } +} diff --git a/swarm_nl/src/prelude.rs b/swarm_nl/src/prelude.rs index ca17a1d0d..8764330f4 100644 --- a/swarm_nl/src/prelude.rs +++ b/swarm_nl/src/prelude.rs @@ -1,5 +1,5 @@ -// Copyright 2024 Algorealm, Inc. -// Apache 2.0 License +//! Copyright 2024 Algorealm, Inc. +//! Apache 2.0 License //! Types and traits that are used throughout SwarmNL. @@ -13,15 +13,6 @@ pub static DEFAULT_IP_ADDRESS: Ipv4Addr = Ipv4Addr::new(0, 0, 0, 0); /// Default amount of time to keep a connection alive. pub static DEFAULT_KEEP_ALIVE_DURATION: Seconds = 60; -/// Struct respresenting data for replication configuration -#[derive(Clone, Default, Debug)] -pub struct ReplConfigData { - /// Replica Network key - pub network_key: String, - /// Replica nodes described by their addresses - pub nodes: HashMap, -} - /// Library error type containing all custom errors that could be encountered. #[derive(Error, Debug)] pub enum SwarmNlError { diff --git a/swarm_nl/src/setup.rs b/swarm_nl/src/setup.rs index 9235ba911..d1a33bc8f 100644 --- a/swarm_nl/src/setup.rs +++ b/swarm_nl/src/setup.rs @@ -1,11 +1,9 @@ -// Copyright 2024 Algorealm, Inc. -// Apache 2.0 License +//! Copyright 2024 Algorealm, Inc. +//! Apache 2.0 License //! Data structures and functions to setup a node and configure it for networking. #![doc = include_str!("../doc/setup/NodeSetup.md")] -use std::{collections::HashMap, rc::Rc}; - use crate::core::gossipsub_cfg::Blacklist; pub use crate::prelude::*; pub use libp2p_identity::{rsa::Keypair as RsaKeypair, KeyType, Keypair, PeerId}; @@ -26,8 +24,6 @@ pub struct BootstrapConfig { boot_nodes: Nodes, /// Blacklisted peers blacklist: Blacklist, - /// Configuration data for replication - replication_cfg: Rc>, } impl BootstrapConfig { @@ -55,8 +51,6 @@ impl BootstrapConfig { boot_nodes: Default::default(), // List of blacklisted peers blacklist: Default::default(), - // List containing replication nodes - replication_cfg: Default::default(), } } @@ -96,22 +90,6 @@ impl BootstrapConfig { } } - /// Configure nodes for replication and add them to bootnodes for early connection - pub fn with_replication(self, cfg_data: Vec) -> Self { - // A connection request must be sent to the replica nodes on startup, so we will add it to - // our list of bootnodes - let bootnodes: HashMap = cfg_data - .iter() - .flat_map(|cfg| cfg.nodes.iter().map(|(k, v)| (k.clone(), v.clone()))) - .collect(); - let node = self.with_bootnodes(bootnodes); - - Self { - replication_cfg: Rc::new(cfg_data), - ..node - } - } - /// Generate a Cryptographic Keypair for node identity creation and message signing. /// /// An RSA keypair cannot be generated on-the-fly. It has to be generated from a `.pk8` file. @@ -202,11 +180,6 @@ impl BootstrapConfig { pub fn blacklist(&self) -> Blacklist { self.blacklist.clone() } - - /// Return the configuration data for replication - pub fn repl_cfg(&self) -> Rc> { - self.replication_cfg.clone() - } } /// [`Default`] implementation for [`BootstrapConfig`]. diff --git a/swarm_nl/src/testing_guide.rs b/swarm_nl/src/testing_guide.rs index d05d95db8..3a57d4d29 100644 --- a/swarm_nl/src/testing_guide.rs +++ b/swarm_nl/src/testing_guide.rs @@ -165,3 +165,12 @@ //! ```bash //! cargo test gossipsub_message_itest_works --features=test-broadcast-node --features=tokio-runtime -- --nocapture //! ``` +//! +//! ## Replication tests +//! +//! For replication tests, we setup the nodes as separate threads to form replication networks. For +//! testing joining and exiting functions, run: +//! +//! ```bash +//! cargo test repl_itest_join_and_exit_works +//! ``` diff --git a/swarm_nl/src/util.rs b/swarm_nl/src/util.rs index 395f2222d..403416276 100644 --- a/swarm_nl/src/util.rs +++ b/swarm_nl/src/util.rs @@ -1,14 +1,23 @@ -// Copyright 2024 Algorealm, Inc. -// Apache 2.0 License +//! Copyright 2024 Algorealm, Inc. +//! Apache 2.0 License //! Utility helper functions for reading from and writing to `.ini` config files. -use crate::{prelude::*, setup::BootstrapConfig}; +use crate::{ + core::{replication::ReplBufferData, ByteVector, Core, StringVector}, + prelude::*, + setup::BootstrapConfig, +}; use base58::FromBase58; use ini::Ini; use libp2p_identity::PeerId; use rand::{distributions::Alphanumeric, Rng}; -use std::{collections::HashMap, path::Path, str::FromStr, time::{SystemTime, UNIX_EPOCH}}; +use std::{ + collections::HashMap, + path::Path, + str::FromStr, + time::{SystemTime, UNIX_EPOCH}, +}; /// Read an INI file containing bootstrap config information. pub fn read_ini_file(file_path: &str) -> SwarmNlResult { @@ -62,18 +71,9 @@ pub fn read_ini_file(file_path: &str) -> SwarmNlResult { Default::default() }; - // Now, read static replication config data if any - let replica_nodes = if let Some(section) = config.section(Some("repl")) { - // Get the configured replica nodes - parse_replication_data(section.get("replica_nodes").unwrap_or_default()) - } else { - Default::default() - }; - Ok(BootstrapConfig::new() .generate_keypair_from_protobuf(key_type, &mut serialized_keypair) .with_bootnodes(boot_nodes) - .with_replication(replica_nodes) .with_blacklist(blacklist) .with_tcp(tcp_port) .with_udp(udp_port)) @@ -129,62 +129,177 @@ fn string_to_hashmap(input: &str) -> HashMap { }) } -/// Parse replica nodes specified in the `bootstrap_config.ini` config file -fn parse_replication_data(input: &str) -> Vec { - let mut result = Vec::new(); +/// Convert a peer ID string to [`PeerId`]. +pub fn string_to_peer_id(peer_id_string: &str) -> Option { + PeerId::from_bytes(&peer_id_string.from_base58().unwrap_or_default()).ok() +} - // Remove brackets and split by '@' - let data = input.trim_matches(|c| c == '[' || c == ']').split('@'); +/// Generate a random string of variable length +pub fn generate_random_string(length: usize) -> String { + let mut rng = rand::thread_rng(); + (0..length) + .map(|_| rng.sample(Alphanumeric) as char) + .collect() +} - for section in data { - if section.is_empty() { - continue; - } +/// Unmarshall data recieved as RPC during the execution of the eventual consistency algorithm to +/// fill in missing messages in the node's buffer +pub fn unmarshal_messages(data: Vec>) -> Vec { + let mut result = Vec::new(); - // Split outer identifier and the rest - if let Some((outer_id, inner_data)) = section.split_once(':') { - let mut inner_map = HashMap::new(); + for entry in data { + let serialized = String::from_utf8_lossy(&entry).to_string(); + let entries: Vec<&str> = serialized.split(Core::ENTRY_DELIMITER).collect(); - // Split each key-value pair - for entry in inner_data.trim_matches(|c| c == '[' || c == ']').split(',') { - if let Some((key, value)) = entry.trim().split_once(':') { - inner_map.insert(key.to_string(), value.to_string()); - } + for entry in entries { + let fields: Vec<&str> = entry.split(Core::FIELD_DELIMITER).collect(); + if fields.len() < 6 { + continue; // Skip malformed entries } - // Create outer map - let cfg = ReplConfigData { - network_key: outer_id.trim().to_string(), - nodes: inner_map, - }; - result.push(cfg); + let data_field: Vec = fields[0] + .split(Core::DATA_DELIMITER) + .map(|s| s.to_string()) + .collect(); + let lamport_clock = fields[1].parse().unwrap_or(0); + let outgoing_timestamp = fields[2].parse().unwrap_or(0); + let incoming_timestamp = fields[3].parse().unwrap_or(0); + let message_id = fields[4].to_string(); + let sender = fields[5]; + + // Parse peerId + if let Ok(peer_id) = sender.parse::() { + result.push(ReplBufferData { + data: data_field, + lamport_clock, + outgoing_timestamp, + incoming_timestamp, + message_id, + sender: peer_id, + confirmations: None, // Since eventual consistency + }); + } } } result } -/// Convert a peer ID string to [`PeerId`]. -pub fn string_to_peer_id(peer_id_string: &str) -> Option { - PeerId::from_bytes(&peer_id_string.from_base58().unwrap_or_default()).ok() +/// Get unix timestamp as string +pub fn get_unix_timestamp() -> Seconds { + // Get the current system time + let now = SystemTime::now(); + // Calculate the duration since the Unix epoch + let duration_since_epoch = now.duration_since(UNIX_EPOCH).expect("Time went backwards"); + // Return the Unix timestamp in seconds as a string + duration_since_epoch.as_secs() } -/// Generate a random string of variable length -pub fn generate_random_string(length: usize) -> String { - let mut rng = rand::thread_rng(); - (0..length) - .map(|_| rng.sample(Alphanumeric) as char) +/// Convert a [ByteVector] to a [StringVector]. +pub fn byte_vec_to_string_vec(input: ByteVector) -> StringVector { + input + .into_iter() + .map(|vec| String::from_utf8(vec).unwrap_or_else(|_| String::from("Invalid UTF-8"))) .collect() } -// Get unix timestamp as string -pub fn get_unix_timestamp() -> Seconds { - // Get the current system time - let now = SystemTime::now(); - // Calculate the duration since the Unix epoch - let duration_since_epoch = now.duration_since(UNIX_EPOCH).expect("Time went backwards"); - // Return the Unix timestamp in seconds as a string - duration_since_epoch.as_secs() +/// Convert a [StringVector] to a [ByteVector] +pub fn string_vec_to_byte_vec(input: StringVector) -> ByteVector { + input.into_iter().map(|s| s.into_bytes()).collect() +} + +/// Marshall the shard network image into a [ByteVector]. +pub fn shard_image_to_bytes(input: HashMap>) -> Vec { + const SHARD_PEER_SEPARATOR: &[u8] = b"&&&"; + const PEER_SEPARATOR: &[u8] = b"%%"; + const SHARD_ENTRY_SEPARATOR: &[u8] = b"@@@"; + + let mut result = Vec::new(); + + for (shard_id, peers) in input { + // Convert shard_id to bytes and append + result.extend_from_slice(shard_id.as_bytes()); + + // Add the separator for peers + result.extend_from_slice(SHARD_PEER_SEPARATOR); + + // Convert each PeerId to bytes and append, separated by PEER_SEPARATOR + for peer in peers.iter() { + result.extend_from_slice(&peer.to_bytes()); + result.extend_from_slice(PEER_SEPARATOR); + } + + // Remove the last PEER_SEPARATOR if any + if !peers.is_empty() { + result.truncate(result.len() - PEER_SEPARATOR.len()); + } + + // Add the shard entry separator + result.extend_from_slice(SHARD_ENTRY_SEPARATOR); + } + + result +} + +/// Merge the incoming shard state with the local shard state of the network. +pub fn merge_shard_states( + local_state: &mut HashMap>, + incoming_state: HashMap>, +) { + for (shard_id, incoming_peers) in incoming_state.iter() { + local_state + .entry(shard_id.to_owned()) + .and_modify(|local_peers| { + // Add only unique peers from incoming_peers to local_peers + for peer in incoming_peers { + if !local_peers.contains(peer) { + local_peers.push(peer.clone()); + } + } + }) + .or_insert(incoming_peers.to_owned()); // If the shard_id doesn't exist, insert it directly + } +} + +/// Unmarshall the byte=re into the shard network image. +pub fn bytes_to_shard_image(input: Vec) -> HashMap> { + const SHARD_ENTRY_SEPARATOR: &[u8] = b"@@@"; + const SHARD_PEER_SEPARATOR: &[u8] = b"&&&"; + const PEER_SEPARATOR: &[u8] = b"%%"; + + let mut result = HashMap::new(); + + // Try to convert the input to a UTF-8 string, return empty HashMap if conversion fails + let input_str = match String::from_utf8(input) { + Ok(s) => s, + Err(_) => return result, + }; + + // Split the input by SHARD_ENTRY_SEPARATOR + for entry in input_str.split(std::str::from_utf8(SHARD_ENTRY_SEPARATOR).unwrap_or("@@@")) { + // Split the entry by SHARD_PEER_SEPARATOR + let parts: Vec<&str> = entry + .split(std::str::from_utf8(SHARD_PEER_SEPARATOR).unwrap_or("&&&")) + .collect(); + + // Ensure we have at least two parts (shard_id and peers) + if parts.len() >= 2 { + let shard_id = parts[0].to_string(); + + // Split peers and convert to PeerIds + let peers: Vec = parts[1] + .split(std::str::from_utf8(PEER_SEPARATOR).unwrap_or("%%")) + .filter_map(|peer_str| PeerId::from_bytes(peer_str.as_bytes()).ok()) + .collect(); + + // Only insert if peers are not empty + if !peers.is_empty() { + result.insert(shard_id, peers); + } + } + } + + result } #[cfg(test)] @@ -199,7 +314,7 @@ mod tests { const CUSTOM_TCP_PORT: Port = 49666; const CUSTOM_UDP_PORT: Port = 49852; - // Helper to create an INI file without a static keypair and a valid range for ports. + // Helper to create an INI file without a keypair and a valid range for ports. fn create_test_ini_file_without_keypair(file_path: &str) { let mut config = Ini::new(); config