Skip to content
This repository was archived by the owner on Feb 27, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
3944476
require strict leaf chain
ss-es Jan 23, 2025
05c61a8
[test] use hotstuff 2 always
ss-es Jan 23, 2025
342e8e1
add undecided leaves on restart
ss-es Jan 24, 2025
d73a13a
fix?
ss-es Jan 24, 2025
2a5bf48
cleanup
ss-es Jan 24, 2025
b142304
add epoch_height
ss-es Jan 24, 2025
c242dbe
fix examples
ss-es Jan 24, 2025
afb5d38
remove overall safety task
ss-es Jan 22, 2025
54e0521
add checks
ss-es Jan 22, 2025
eca45d9
fix tests_1
ss-es Jan 22, 2025
afe359f
fix tests_2
ss-es Jan 24, 2025
5df0746
fix tests_3
ss-es Jan 24, 2025
54bde28
fix tests_4
ss-es Jan 24, 2025
b79b4a7
fix tests_5
ss-es Jan 24, 2025
9784c06
fix tests_6
ss-es Jan 24, 2025
54e5f15
remove num_failed_views
ss-es Jan 24, 2025
451f535
try disabling hotstuff 2
ss-es Jan 25, 2025
f297077
fix
ss-es Jan 25, 2025
7b26c1b
re-enable hotstuff 2
ss-es Jan 25, 2025
9937a67
add timeout
ss-es Jan 27, 2025
3cf14c1
fix
ss-es Jan 27, 2025
e5148a2
fix tests
ss-es Jan 31, 2025
a5db056
Merge branch 'main' into ss/rewrite-overall-safety-task
ss-es Jan 31, 2025
3e9d794
fix flakiness
ss-es Jan 31, 2025
4a8af68
fix flakiness
ss-es Jan 31, 2025
65ed0e4
Merge branch 'main' into ss/rewrite-overall-safety-task
ss-es Feb 3, 2025
c7f3f71
fix logging
ss-es Feb 3, 2025
48fecc2
fix new tests
ss-es Feb 3, 2025
540d1cd
clippy
ss-es Feb 3, 2025
9eeff4c
Merge branch 'main' into ss/rewrite-overall-safety-task
ss-es Feb 4, 2025
b2bd94c
address comments
ss-es Feb 6, 2025
0617280
Merge branch 'main' into ss/rewrite-overall-safety-task
ss-es Feb 6, 2025
ab6ea51
clippy
ss-es Feb 7, 2025
4ae2742
make sure to check num_successful_views after timeout
ss-es Feb 7, 2025
80dc3fa
fix
ss-es Feb 7, 2025
0e2262c
fixes
ss-es Feb 7, 2025
1c854b7
fix staggered restart test
ss-es Feb 7, 2025
6e8a8c9
fix tests_3
ss-es Feb 7, 2025
a9228e5
fix tests_4
ss-es Feb 7, 2025
ced9784
fix
ss-es Feb 7, 2025
f0e94e3
fix
ss-es Feb 7, 2025
ecac165
fixes
ss-es Feb 10, 2025
98ea5e6
more fixes
ss-es Feb 10, 2025
82cd053
fix
ss-es Feb 10, 2025
56f16c2
Merge branch 'main' into ss/rewrite-overall-safety-task
ss-es Feb 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions crates/hotshot/src/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,8 @@ pub fn add_network_message_task<
// Wait for a message from the network
message = network.recv_message().fuse() => {
// Make sure the message did not fail
let message = match message {
Ok(message) => {
message
}
Err(e) => {
tracing::trace!("Failed to receive message: {:?}", e);
continue;
}
let Ok(message) = message else {
continue;
};

// Deserialize the message
Expand Down
2 changes: 1 addition & 1 deletion crates/hotshot/src/traits/networking/memory_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ impl<K: SignatureKey + 'static> ConnectedNetwork<K> for MemoryNetwork<K> {
.iter()
{
if !recipients.contains(&node.0) {
tracing::error!("Skipping node because not in recipient list: {:?}", &node.0);
tracing::trace!("Skipping node because not in recipient list: {:?}", &node.0);
continue;
}
// TODO delay/drop etc here
Expand Down
1 change: 1 addition & 0 deletions crates/testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ tide-disco = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
utils = { path = "../utils" }
vbs = { workspace = true }
vec1 = { workspace = true }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
166 changes: 152 additions & 14 deletions crates/testing/src/consistency_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#![allow(clippy::unwrap_or_default)]
use std::{collections::BTreeMap, marker::PhantomData};

use anyhow::{bail, ensure, Context, Result};
use async_broadcast::Sender;
use async_trait::async_trait;
use committable::Committable;
use hotshot_example_types::block_types::TestBlockHeader;
Expand All @@ -17,11 +17,13 @@ use hotshot_types::{
message::UpgradeLock,
traits::node_implementation::{ConsensusTime, NodeType, Versions},
};
use tokio::task::JoinHandle;
use utils::anytrace::*;

use crate::{
overall_safety_task::OverallSafetyPropertiesDescription,
test_builder::TransactionValidator,
test_task::{TestResult, TestTaskState},
test_task::{spawn_timeout_task, TestEvent, TestResult, TestTaskState},
};

/// Map from views to leaves for a single node, allowing multiple leaves for each view (because the node may a priori send us multiple leaves for a given view).
Expand Down Expand Up @@ -100,7 +102,12 @@ async fn validate_node_map<TYPES: NodeType, V: Versions>(
child
.extends_upgrade(parent, &upgrade_lock.decided_upgrade_certificate)
.await
.context("Leaf {child} does not extend its parent {parent}")?;
.context(|e| {
error!(
"Leaf {child:?} does not extend its parent {parent:?}: {}",
e
)
})?;

// We want to make sure the commitment matches,
// but allow for the possibility that we may have skipped views in between.
Expand Down Expand Up @@ -138,7 +145,7 @@ fn sanitize_network_map<TYPES: NodeType>(
result.insert(
*node,
sanitize_node_map(node_map)
.context(format!("Node {node} produced inconsistent leaves."))?,
.context(|e| error!("Node {node} produced inconsistent leaves: {}", e))?,
);
}

Expand All @@ -159,7 +166,7 @@ async fn invert_network_map<TYPES: NodeType, V: Versions>(
for (node_id, node_map) in network_map.iter() {
validate_node_map::<TYPES, V>(node_map)
.await
.context(format!("Node {node_id} has an invalid leaf history"))?;
.context(|e| error!("Node {node_id} has an invalid leaf history: {}", e))?;

// validate each node's leaf map
for (view, leaf) in node_map.iter() {
Expand All @@ -186,9 +193,13 @@ fn sanitize_view_map<TYPES: NodeType>(

ensure!(
node_leaves.len() <= 1,
leaf_map.iter().fold(
format!("The network does not agree on view {view:?}."),
|acc, (node, leaf)| { format!("{acc}\n\nNode {node} sent us leaf:\n\n{leaf:?}") }
error!(
"The network does not agree on the following views: {}",
leaf_map
.iter()
.fold(format!("\n\nView {view:?}:"), |acc, (node, leaf)| {
format!("{acc}\n\nNode {node} sent us leaf:\n\n{leaf:?}")
})
)
);

Expand All @@ -207,18 +218,29 @@ fn sanitize_view_map<TYPES: NodeType>(
Ok(result)
}

enum TestProgress {
Incomplete,
Finished,
}

/// Data availability task state
pub struct ConsistencyTask<TYPES: NodeType, V: Versions> {
/// A map from node ids to (leaves keyed on view number)
pub consensus_leaves: NetworkMap<TYPES>,
/// safety task requirements
pub safety_properties: OverallSafetyPropertiesDescription<TYPES>,
pub safety_properties: OverallSafetyPropertiesDescription,
/// whether we should have seen an upgrade certificate or not
pub ensure_upgrade: bool,
/// a list of errors accumulated by the task
pub errors: Vec<Error>,
/// channel used to shutdown the test
pub test_sender: Sender<TestEvent>,
/// phantom marker
pub _pd: PhantomData<V>,
/// function used to validate the number of transactions committed in each block
pub validate_transactions: TransactionValidator,
/// running timeout task
pub timeout_task: JoinHandle<()>,
}

impl<TYPES: NodeType<BlockHeader = TestBlockHeader>, V: Versions> ConsistencyTask<TYPES, V> {
Expand All @@ -228,6 +250,15 @@ impl<TYPES: NodeType<BlockHeader = TestBlockHeader>, V: Versions> ConsistencyTas
let inverted_map = invert_network_map::<TYPES, V>(&sanitized_network_map).await?;

let sanitized_view_map = sanitize_view_map(&inverted_map)?;
let num_successful_views = sanitized_view_map.iter().len();

// check that we've succeeded in enough views
ensure!(
num_successful_views >= self.safety_properties.num_successful_views,
"Not enough successful views: expected {:?} but got {:?}",
self.safety_properties.num_successful_views,
num_successful_views,
);

let expected_upgrade = self.ensure_upgrade;
let actual_upgrade = sanitized_view_map.iter().fold(false, |acc, (_view, leaf)| {
Expand All @@ -253,6 +284,90 @@ impl<TYPES: NodeType<BlockHeader = TestBlockHeader>, V: Versions> ConsistencyTas

Ok(())
}

async fn partial_validate(&self) -> Result<TestProgress> {
self.check_view_success().await?;
self.check_view_failure().await?;

self.check_total_successes().await
}

fn add_error(&mut self, error: Error) {
self.errors.push(error);
}

async fn handle_result(&mut self, result: Result<TestProgress>) {
match result {
Ok(TestProgress::Finished) => {
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
}
Err(e) => {
self.add_error(e);
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
}
Ok(TestProgress::Incomplete) => {}
}
}

async fn check_total_successes(&self) -> Result<TestProgress> {
let sanitized_network_map = sanitize_network_map(&self.consensus_leaves)?;

let inverted_map = invert_network_map::<TYPES, V>(&sanitized_network_map).await?;

if inverted_map.len() >= self.safety_properties.num_successful_views {
Ok(TestProgress::Finished)
} else {
Ok(TestProgress::Incomplete)
}
}
pub async fn check_view_success(&self) -> Result<()> {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This checking that we didn't decide a view that we think should fail? can't we just check this super simply by looking at the leaf.view rather than getting it back out of our map

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes you're right! this was just copy-paste

will fix

for (node_id, node_map) in self.consensus_leaves.iter() {
for (view, leaf) in node_map {
ensure!(
!self
.safety_properties
.expected_view_failures
.contains(view),
"Expected a view failure, but got a decided leaf for view {:?} from node {:?}.\n\nLeaf:\n\n{:?}",
view,
node_id,
leaf
);
}
}

Ok(())
}

pub async fn check_view_failure(&self) -> Result<()> {
let sanitized_network_map = sanitize_network_map(&self.consensus_leaves)?;

let mut inverted_map = invert_network_map::<TYPES, V>(&sanitized_network_map).await?;

let (current_view, _) = inverted_map
.pop_last()
.context(error!("Leaf map is empty, which should be impossible"))?;
let Some((last_view, _)) = inverted_map.pop_last() else {
// the view cannot fail if there wasn't a prior view in the map.
return Ok(());
};

// filter out views we expected to (possibly) fail
let unexpected_failed_views: Vec<_> = (*(last_view + 1)..*current_view)
.filter(|view| {
!self.safety_properties.expected_view_failures.contains(view)
&& !self.safety_properties.possible_view_failures.contains(view)
})
.collect();

ensure!(
unexpected_failed_views.is_empty(),
"Unexpected failed views: {:?}",
unexpected_failed_views
);

Ok(())
}
}

#[async_trait]
Expand All @@ -268,23 +383,46 @@ impl<TYPES: NodeType<BlockHeader = TestBlockHeader>, V: Versions> TestTaskState
..
} = message
{
let map = &mut self.consensus_leaves.entry(id).or_insert(BTreeMap::new());
{
let mut timeout_task = spawn_timeout_task(
self.test_sender.clone(),
self.safety_properties.decide_timeout,
);

std::mem::swap(&mut self.timeout_task, &mut timeout_task);

timeout_task.abort();
}

for leaf_info in leaf_chain.iter().rev() {
let map = &mut self.consensus_leaves.entry(id).or_insert(BTreeMap::new());

leaf_chain.iter().for_each(|leaf_info| {
map.entry(leaf_info.leaf.view_number())
.and_modify(|vec| vec.push(leaf_info.leaf.clone()))
.or_insert(vec![leaf_info.leaf.clone()]);
});

let result = self.partial_validate().await;

self.handle_result(result).await;
}
}

Ok(())
}

async fn check(&self) -> TestResult {
self.timeout_task.abort();

let mut errors: Vec<_> = self.errors.iter().map(|e| e.to_string()).collect();

if let Err(e) = self.validate().await {
return TestResult::Fail(Box::new(e));
errors.push(e.to_string());
}

TestResult::Pass
if !errors.is_empty() {
TestResult::Fail(Box::new(errors))
} else {
TestResult::Pass
}
}
}
Loading