Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: create network simulator #108

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1b70ae4
create nix scripts, create clap CLI
0xmovses Feb 12, 2024
1024138
move resuable utils into simulator
0xmovses Feb 12, 2024
a580994
fix bug in build.debug for protoc v comparison
0xmovses Feb 12, 2024
a473901
debug
0xmovses Feb 12, 2024
61695f5
update nix deps for macOS target
Feb 13, 2024
1d3d0b1
add nix.flake
Feb 13, 2024
51bf9f9
mv flake.nix
0xmovses Feb 13, 2024
4fd4d35
resolve mac OS
0xmovses Feb 13, 2024
25eb72c
Merge pull request #112 from movemntdev/0xmovses/flake
0xmovses Feb 13, 2024
5fc8fbf
add libcxx and link
0xmovses Feb 14, 2024
38b87be
aarch64
0xmovses Feb 14, 2024
48a5be2
remove pkgs already used by rocksdb
0xmovses Feb 14, 2024
dc9e67d
new flake
0xmovses Feb 14, 2024
9b0a233
update network init, refactors
0xmovses Feb 16, 2024
ad69184
get ava go paths, improve error handling, add log debugs
0xmovses Feb 17, 2024
196fb54
update init logic
0xmovses Feb 17, 2024
59ebfa9
solve avalanchego bin exec bug
0xmovses Feb 18, 2024
0bce357
revert config.toml
0xmovses Feb 18, 2024
6ce4cc2
implement Simulator, refactor, dispatch
0xmovses Feb 19, 2024
aee52e7
updates
0xmovses Feb 19, 2024
31a5d54
cargo.lock
0xmovses Feb 19, 2024
e681064
add validators
0xmovses Feb 20, 2024
cbc005b
add validators
0xmovses Feb 20, 2024
d8f6e0c
Merge pull request #113 from movemntdev/0xmovses/system-deps
0xmovses Feb 20, 2024
90b881a
fix add validator
0xmovses Feb 20, 2024
21c4917
cleanup
0xmovses Feb 20, 2024
1f3daf6
remove unused
0xmovses Feb 20, 2024
dde6bef
remove unused
0xmovses Feb 20, 2024
9cd0cdd
improve get subnet_id & init_log
0xmovses Feb 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion m1/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -30,4 +30,4 @@ rustflags = [
"force-unwind-tables=yes",
"-C",
"link-arg=/STACK:8000000" # Set stack to 8 MB
]
]
125 changes: 100 additions & 25 deletions m1/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion m1/Cargo.toml
Original file line number Diff line number Diff line change
@@ -3,7 +3,8 @@ resolver = "2"
members = [
"subnet",
"tests/e2e",
"e2e-benchmark"
"e2e-benchmark",
"simulator",
]

[workspace.package]
@@ -35,6 +36,7 @@ codespan-reporting = "0.11.1"
criterion = "0.3.5"
criterion-cpu-time = "0.1.0"
dirs = "4.0.0"
env_logger = "0.10.1"
hex = "0.4.3"
hkdf = "0.10.0"
hostname = "0.3.1"
2 changes: 2 additions & 0 deletions m1/justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build:
./scripts/build.debug.sh && cargo build -p simuatlor --bin simulator
5 changes: 5 additions & 0 deletions m1/rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
comment_width = 100
format_code_in_doc_comments = true
imports_granularity = "Crate"
imports_layout = "Vertical"
wrap_comments = true
9 changes: 6 additions & 3 deletions m1/scripts/build.debug.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env bash
#!/usr/bin/env bash
set -xue

if ! [[ "$0" =~ scripts/build.debug.sh ]]; then
@@ -7,8 +8,10 @@ if ! [[ "$0" =~ scripts/build.debug.sh ]]; then
fi

PROTOC_VERSION=$(protoc --version | cut -f2 -d' ')
if [[ "${PROTOC_VERSION}" == "" ]] || [[ "${PROTOC_VERSION}" < 3.15.0 ]]; then
echo "protoc must be installed and the version must be greater than 3.15.0"
MIN_VERSION="3.15"

if ! printf "%s\n%s" "$PROTOC_VERSION" "$MIN_VERSION" | sort -V | tail -n 1 | grep -q "$PROTOC_VERSION"; then
echo "protoc must be installed and the version must be greater than 3.15"
exit 255
fi

@@ -18,4 +21,4 @@ cargo build -p subnet --bin subnet
./target/debug/subnet --help

./target/debug/subnet genesis "hello world"
./target/debug/subnet vm-id subnet
./target/debug/subnet vm-id subnet
25 changes: 25 additions & 0 deletions m1/scripts/subnet-cli-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash -e

# Install subnet-cli
VERSION=0.0.4 # Populate latest here

GOARCH=$(go env GOARCH)
GOOS=$(go env GOOS)
DOWNLOAD_PATH=/tmp/subnet-cli.tar.gz
DOWNLOAD_URL=https://github.com/ava-labs/subnet-cli/releases/download/v${VERSION}/subnet-cli_${VERSION}_linux_${GOARCH}.tar.gz
if [[ ${GOOS} == "darwin" ]]; then
DOWNLOAD_URL=https://github.com/ava-labs/subnet-cli/releases/download/v${VERSION}/subnet-cli_${VERSION}_darwin_${GOARCH}.tar.gz
fi

rm -f ${DOWNLOAD_PATH}
rm -f /tmp/subnet-cli

echo "downloading subnet-cli ${VERSION} at ${DOWNLOAD_URL}"
curl -L ${DOWNLOAD_URL} -o ${DOWNLOAD_PATH}

echo "extracting downloaded subnet-cli"
tar xzvf ${DOWNLOAD_PATH} -C /tmp

/tmp/subnet-cli -h

cp /tmp/subnet-cli $HOME/bin/subnet-cli
23 changes: 23 additions & 0 deletions m1/simulator/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "simulator"
version = "0.1.0"
edition = "2021"

[dependencies]
avalanche-installer = "0.0.77"
avalanche-network-runner-sdk = { git = "https://github.com/0xmovses/avalanche-network-runner-sdk-rs", branch = "main" }
log = "0.4.19"
random-manager = "0.0.5"
serde = { workspace = true }
serde_json = "1.0.108" # https://github.com/serde-rs/json/releases
avalanche-types = { workspace = true } # https://crates.io/crates/avalanche-types
aptos-sdk = {workspace = true }
anyhow = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
tokio = { workspace = true }
tonic = "0.9.2"
once_cell = { workspace = true }
rand = { workspace = true }
reqwest = "0.11.24"
clap = { workspace = true }
150 changes: 150 additions & 0 deletions m1/simulator/src/commands.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
use clap::{Parser, Subcommand};

#[derive(Debug, Parser, Clone)]
#[clap(name = "forc index", about = "M1 network simulator", version = "0.1")]
pub struct Cli {
/// The command to run
#[clap(subcommand)]
pub command: SubCommands,
/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,
}

/// Start the simulator
#[derive(Debug, Parser, Clone)]
pub struct StartCommand {
/// The number of validators for the network
#[clap(
long,
default_value = "5",
help = "The number of validators for the network."
)]
pub nodes: u64,

/// Sets if the validators join the network at once, or in a staggered way
#[clap(
long,
default_value = "false",
help = "Sets if the validators join the network at once, or in a staggered way."
)]
pub staggered: bool,

/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,

/// The GRPC endpoint of the network runner to connect to
#[clap(long, help = "The GRPC endpoint of the network runner to connect to.")]
pub grpc_endpoint: Option<String>,
}

/// Partition the network
#[derive(Debug, Parser, Clone)]
pub struct PartitionCommand {
/// The percentage of validators that will be partitioned
#[clap(
long,
default_value = "5",
help = "The percentage of validators that will be in a partitioned state"
)]
pub amount: u8,

/// Sets if the validators become paritioned at once or in a staggered way
#[clap(
long,
default_value = "false",
help = "Sets if the validators become partitioned at once or in a staggered way."
)]
pub staggered: bool,

/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,
}

#[derive(Debug, Parser, Clone)]
pub struct ReconnectCommand {
/// The nodes to reconnect by `NodeId`
pub nodes: Vec<u64>,

/// Sets if the validators rejoin the network together or in a staggered way
#[clap(long, default_value = "false")]
pub staggered: bool,

/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,
}

/// Add a node to the network
#[derive(Debug, Parser, Clone)]
pub struct AddNodeCommand {
/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,

/// The name of the node to add
#[clap(long, help = "The name of the node to add.")]
pub name: Option<String>,
}

#[derive(Debug, Parser, Clone)]
pub struct RemoveNodeCommand {
/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,

/// The name of the node to remove
#[clap(long, help = "The name of the node to remove.")]
pub name: String,
}

#[derive(Debug, Parser, Clone)]
pub struct AddValidatorCommand {
/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,

/// The name of the validator to add
#[clap(long, help = "The name of the validator to add.")]
pub name: String,
}

#[derive(Debug, Parser, Clone)]
pub struct RemoveValidatorCommand {
/// Verbose output
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,

/// The name of the validator to remove
#[clap(long, help = "The name of the validator to remove.")]
pub name: String,
}

#[derive(Debug, Parser, Clone)]
pub struct HealthCommand {
/// Verbose ouput
#[clap(short, long, help = "Verbose output.")]
pub verbose: bool,
}

#[derive(Debug, Subcommand, Clone)]
pub enum SubCommands {
/// Starts the network with a number of validators
Start(StartCommand),
/// Adds a node to the network
AddNode(AddNodeCommand),
/// Removes a node from the network
RemoveNode(RemoveNodeCommand),
/// Adds a validator to the network
AddValidator(AddValidatorCommand),
/// Removes a validator from the network
RemoveValidator(RemoveValidatorCommand),
/// Simulates a network partition.
Partition(PartitionCommand),
/// Reconnects the validators after they have become partitioned
Reconnect(ReconnectCommand),
/// Output the overall network and consensus health
Health(HealthCommand),
}
465 changes: 465 additions & 0 deletions m1/simulator/src/lib.rs

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions m1/simulator/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use clap::Parser;
use simulator::{commands::Cli, Simulator};

#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
let cli = Cli::parse();
let mut simulator = Simulator::new(cli.command).await?;
simulator.exec(cli.verbose).await?;
Ok(())
}
4 changes: 3 additions & 1 deletion m1/tests/e2e/Cargo.toml
Original file line number Diff line number Diff line change
@@ -9,10 +9,12 @@ license = "BSD-3-Clause"
homepage = "https://avax.network"

[dependencies]
tokio = { workspace = true }
simulator = { path = "../../simulator" }

[dev-dependencies]
avalanche-installer = "0.0.77"
avalanche-network-runner-sdk = "0.3.3" # https://crates.io/crates/avalanche-network-runner-sdk
avalanche-network-runner-sdk = { git = "https://github.com/0xmovses/avalanche-network-runner-sdk-rs", branch = "main" }
avalanche-types = { workspace = true } # https://crates.io/crates/avalanche-types
env_logger = "0.10.1"
log = "0.4.19"
27 changes: 0 additions & 27 deletions m1/tests/e2e/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,4 @@
#[cfg(test)]
mod tests;

#[must_use]
pub fn get_network_runner_grpc_endpoint() -> (String, bool) {
match std::env::var("NETWORK_RUNNER_GRPC_ENDPOINT") {
Ok(s) => (s, true),
_ => (String::new(), false),
}
}

#[must_use]
pub fn get_network_runner_enable_shutdown() -> bool {
matches!(std::env::var("NETWORK_RUNNER_ENABLE_SHUTDOWN"), Ok(_))
}

#[must_use]
pub fn get_avalanchego_path() -> (String, bool) {
match std::env::var("AVALANCHEGO_PATH") {
Ok(s) => (s, true),
_ => (String::new(), false),
}
}

#[must_use]
pub fn get_vm_plugin_path() -> (String, bool) {
match std::env::var("VM_PLUGIN_PATH") {
Ok(s) => (s, true),
_ => (String::new(), false),
}
}
250 changes: 13 additions & 237 deletions m1/tests/e2e/src/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -1,245 +1,21 @@
use core::time;
use std::{
io,
fs::{self, File},
path::Path,
str::FromStr,
thread,
time::{Duration, Instant}, io::Write,
use simulator::{
commands::{StartCommand, SubCommands},
Simulator,
};

use avalanche_network_runner_sdk::{BlockchainSpec, Client, GlobalConfig, StartRequest};
use avalanche_types::{ids, jsonrpc::client::info as avalanche_sdk_info, subnet};

const AVALANCHEGO_VERSION: &str = "v1.10.9";

// todo: extracted from genesis method
// todo: really we should use a genesis once more
pub fn sync_genesis(byte_string : &str, file_path: &str) -> io::Result<()> {
log::info!("syncing genesis to '{}'", file_path);

let path = Path::new(file_path);
let parent_dir = path.parent().expect("Invalid path");
fs::create_dir_all(parent_dir)?;

let d = byte_string.as_bytes();

let mut f = File::create(file_path)?;
f.write_all(&d)?;

Ok(())
}

#[tokio::test]
async fn e2e() {
let _ = env_logger::builder()
.filter_level(log::LevelFilter::Info)
.is_test(true)
.try_init();

let (ep, is_set) = crate::get_network_runner_grpc_endpoint();
assert!(is_set);

let cli = Client::new(&ep).await;

log::info!("ping...");
let resp = cli.ping().await.expect("failed ping");
log::info!("network-runner is running (ping response {:?})", resp);

let (vm_plugin_path, exists) = crate::get_vm_plugin_path();
log::info!("Vm Plugin path: {vm_plugin_path}");
assert!(exists);
assert!(Path::new(&vm_plugin_path).exists());

let vm_id = Path::new(&vm_plugin_path)
.file_stem()
.unwrap()
.to_str()
.unwrap()
.to_string();
// ! for now, we hardcode the id to be subnet for orchestration
let vm_id = subnet::vm_name_to_id("subnet").unwrap();

let (mut avalanchego_exec_path, _) = crate::get_avalanchego_path();
let plugins_dir = if !avalanchego_exec_path.is_empty() {
let parent_dir = Path::new(&avalanchego_exec_path)
.parent()
.expect("unexpected None parent");
parent_dir
.join("plugins")
.as_os_str()
.to_str()
.unwrap()
.to_string()
} else {
let exec_path = avalanche_installer::avalanchego::github::download(
None,
None,
Some(AVALANCHEGO_VERSION.to_string()),
)
.await
.unwrap();
avalanchego_exec_path = exec_path;
avalanche_installer::avalanchego::get_plugin_dir(&avalanchego_exec_path)
let cmd = StartCommand {
nodes: 5,
staggered: false,
verbose: false,
grpc_endpoint: None,
};

log::info!(
"copying vm plugin {} to {}/{}",
vm_plugin_path,
plugins_dir,
vm_id
);

fs::create_dir(&plugins_dir).unwrap();
fs::copy(
&vm_plugin_path,
Path::new(&plugins_dir).join(vm_id.to_string()),
)
.unwrap();

// write some random genesis file
let genesis = random_manager::secure_string(10);

let genesis_file_path = random_manager::tmp_path(10, None).unwrap();
sync_genesis(genesis.as_ref(), &genesis_file_path).unwrap();

log::info!(
"starting {} with avalanchego {}, genesis file path {}",
vm_id,
&avalanchego_exec_path,
genesis_file_path,
);
let resp = cli
.start(StartRequest {
exec_path: avalanchego_exec_path,
num_nodes: Some(5),
plugin_dir: plugins_dir,
global_node_config: Some(
serde_json::to_string(&GlobalConfig {
log_level: String::from("info"),
})
.unwrap(),
),
blockchain_specs: vec![BlockchainSpec {
vm_name: String::from("subnet"),
genesis: genesis_file_path.to_string(),
// blockchain_alias : String::from("subnet"), // todo: this doesn't always work oddly enough, need to debug
..Default::default()
}],
..Default::default()
})
let mut simulator = Simulator::new(SubCommands::Start(cmd))
.await
.expect("failed start");
log::info!(
"started avalanchego cluster with network-runner: {:?}",
resp
);

// enough time for network-runner to get ready
thread::sleep(Duration::from_secs(20));

log::info!("checking cluster healthiness...");
let mut ready = false;

let timeout = Duration::from_secs(300);
let interval = Duration::from_secs(15);
let start = Instant::now();
let mut cnt: u128 = 0;
loop {
let elapsed = start.elapsed();
if elapsed.gt(&timeout) {
break;
}

let itv = {
if cnt == 0 {
// first poll with no wait
Duration::from_secs(1)
} else {
interval
}
};
thread::sleep(itv);

ready = {
match cli.health().await {
Ok(_) => {
log::info!("healthy now!");
true
}
Err(e) => {
log::warn!("not healthy yet {}", e);
false
}
}
};
if ready {
break;
}

cnt += 1;
}
assert!(ready);

log::info!("checking status...");
let mut status = cli.status().await.expect("failed status");
loop {
let elapsed = start.elapsed();
if elapsed.gt(&timeout) {
break;
}

if let Some(ci) = &status.cluster_info {
if !ci.custom_chains.is_empty() {
break;
}
}

log::info!("retrying checking status...");
thread::sleep(interval);
status = cli.status().await.expect("failed status");
}

assert!(status.cluster_info.is_some());
let cluster_info = status.cluster_info.unwrap();
let mut rpc_eps: Vec<String> = Vec::new();
for (node_name, iv) in cluster_info.node_infos.into_iter() {
log::info!("{}: {}", node_name, iv.uri);
rpc_eps.push(iv.uri.clone());
}
let mut blockchain_id = ids::Id::empty();
for (k, v) in cluster_info.custom_chains.iter() {
log::info!("custom chain info: {}={:?}", k, v);
if v.chain_name == "subnet" {
blockchain_id = ids::Id::from_str(&v.chain_id).unwrap();
break;
}
}
log::info!("avalanchego RPC endpoints: {:?}", rpc_eps);

let resp = avalanche_sdk_info::get_network_id(&rpc_eps[0])
.expect("Failed to create simulator");
simulator
.exec(cmd.verbose)
.await
.unwrap();
let network_id = resp.result.unwrap().network_id;
log::info!("network Id: {}", network_id);

// keep alive by sleeping for duration provided by SUBNET_TIMEOUT environment variable
// use sensible default

let val = std::env::var("SUBNET_TIMEOUT")
.unwrap_or_else(|_| "0".to_string())
.parse::<i64>()
.unwrap();

log::info!("sleeping for {} seconds", timeout.as_secs());
if val < 0 {
// run forever
loop {
thread::sleep(Duration::from_secs(1000));
}
} else {
let timeout = Duration::from_secs(val as u64);
thread::sleep(timeout);
}

.expect("Failed to execute simulator");
}