From 1ae820d00e5062cf2a709f36d9f913c7cb2ef933 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Thu, 9 Jan 2025 10:34:55 +0100 Subject: [PATCH] tests(iroh): Add some context to test errors (#3066) ## Description The flaky tests are being super weird and making no sense. This is clutching at straws for any hint. ## Breaking Changes ## Notes & open questions /cc @ramfox Two slightly odd things I spotted while looking at this: - The use of 240.0.0.1 as "bad" IP address returned in discovery. Maybe one from the test nets in RFC 5737 would be better? - The endpoints accept a connection and immediately drop it. This could potentially send the CONNECTION_CLOSE before the ACK from the connection resulting in the connect call returning an error (BUT THAT WOULD BE A NORMAL READABLE TEST FAILURE). This probably doesn't happen because it's all local and very fast and Quinn has already ACKed the connection before the code manages to drop it. Still, essentially racy code. Fine, I added a fix for the 2nd. But it's not the issue. ## Change checklist - [x] Self-review. - [x] Documentation updates following the [style guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text), if relevant. - [x] Tests if relevant. - [x] All breaking changes documented. --- iroh/src/discovery.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/iroh/src/discovery.rs b/iroh/src/discovery.rs index c23789b578..e6ecc4493d 100644 --- a/iroh/src/discovery.rs +++ b/iroh/src/discovery.rs @@ -443,6 +443,7 @@ mod tests { time::SystemTime, }; + use anyhow::Context; use iroh_base::SecretKey; use rand::Rng; use tokio_util::task::AbortOnDropHandle; @@ -604,8 +605,11 @@ mod tests { }; let ep1_addr = NodeAddr::new(ep1.node_id()); // wait for out address to be updated and thus published at least once - ep1.node_addr().await?; - let _conn = ep2.connect(ep1_addr, TEST_ALPN).await?; + ep1.node_addr().await.context("waiting for NodeAddr")?; + let _conn = ep2 + .connect(ep1_addr, TEST_ALPN) + .await + .context("connecting")?; Ok(()) } @@ -706,10 +710,13 @@ mod tests { let handle = tokio::spawn({ let ep = ep.clone(); async move { + // Keep connections alive until the task is dropped. + let mut connections = Vec::new(); // we skip accept() errors, they can be caused by retransmits while let Some(connecting) = ep.accept().await.and_then(|inc| inc.accept().ok()) { - let _conn = connecting.await?; // Just accept incoming connections, but don't do anything with them. + let conn = connecting.await?; + connections.push(conn); } anyhow::Ok(())