Skip to content
This repository has been archived by the owner on Jan 11, 2024. It is now read-only.

FM-363: State sync test #463

Merged
merged 18 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions docs/ipc.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ cargo make --makefile infra/Makefile.toml \
-e BOOTSTRAPS=<BOOTSTRAP_ENDPOINT>
-e PARENT_REGISTRY=<PARENT_REGISTRY_CONTRACT_ADDR> \
-e PARENT_GATEWAY=<GATEWAY_REGISTRY_CONTRACT_ADDR> \
-e CMT_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
-e CMT_P2P_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
bootstrap
```
You'll see that by the end of the output, this command should output the network address of your bootstrap. You can use this endpoint to include this bootstrap node as a seed in the `seeds` configuration of CometBFT.
Expand All @@ -47,7 +47,7 @@ cargo make --makefile infra/Makefile.toml \
- `SUBNET_ID`: SubnetID the bootstrap is operating in.
- `NODE_NAME` (optional): Node name information to attach to the containers of the deployment. This will be needed to deploy more than one bootstrap in the same local environment.
- `BOOTSTRAPS`: Comma separated list of bootstraps (or seeds in CometBFT parlance) that we want this bootstrap to also be connected to.
- `CMT_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:<P2P_RPC_PORT>`).
- `CMT_P2P_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:<P2P_RPC_PORT>`).
- `PARENT_ENDPOINT`: Public endpoint that the validator should use to connect to the parent.
- `PARENT_REGISTRY`: Ethereum address of the IPC registry contract in the parent
- `PARENT_GATEWAY`: Ethereum address of the IPC gateway contract in the parent.
Expand Down Expand Up @@ -76,7 +76,7 @@ cargo make --makefile infra/Makefile.toml \
-e BOOTSTRAPS=<BOOTSTRAP_ENDPOINT>
-e PARENT_REGISTRY=<PARENT_REGISTRY_CONTRACT_ADDR> \
-e PARENT_GATEWAY=<GATEWAY_REGISTRY_CONTRACT_ADDR> \
-e CMT_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
-e CMT_P2P_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
child-validator
```
This command will run the infrastructure for a Fendermint validator in the child subnet. It will generate the genesis of the subnet from the information in its parent, and will run the validator's infrastructure with the specific configuration passed in the command.
Expand All @@ -89,7 +89,7 @@ This command will run the infrastructure for a Fendermint validator in the child
- `PRIVATE_KEY_PATH`: Path of the hex encoded private key for your validator (it should be the corresponding one used to join the subnet in the parent). This can be exported from the `ipc-cli` or any other wallet like Metamask.
- `SUBNET_ID`: SubnetID for the child subnet.
- `BOOTSTRAPS`: Comma separated list of bootstraps (or seeds in CometBFT parlance).
- `CMT_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:<P2P_RPC_PORT>`).
- `CMT_P2P_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:<P2P_RPC_PORT>`).
- `PARENT_ENDPOINT`: Public endpoint that the validator should use to connect to the parent.
- `PARENT_REGISTRY`: Ethereum address of the IPC registry contract in the parent
- `PARENT_GATEWAY`: Ethereum address of the IPC gateway contract in the parent.
Expand All @@ -114,12 +114,11 @@ cargo make --makefile infra/Makefile.toml \
-e BOOTSTRAPS=<BOOTSTRAP_ENDPOINT>
-e PARENT_REGISTRY=<PARENT_REGISTRY_CONTRACT_ADDR> \
-e PARENT_GATEWAY=<GATEWAY_REGISTRY_CONTRACT_ADDR> \
-e CMT_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
-e CMT_P2P_EXTERNAL_ADDR=<COMETBFT_EXTERNAL_ENDPOINT> \
child-fullnode
```
The full node also has its corresponding commands to kill and restart the node:
```
cargo make --makefile infra/Makefile.toml child-fullnode-down
cargo make --makefile infra/Makefile.toml child-fullnode-restart
```

8 changes: 8 additions & 0 deletions fendermint/app/settings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ pub struct SnapshotSettings {
/// How often to poll CometBFT to see whether it has caught up with the chain.
#[serde_as(as = "DurationSeconds<u64>")]
pub sync_poll_interval: Duration,
/// Temporary directory for downloads.
download_dir: Option<PathBuf>,
}

impl SnapshotSettings {
pub fn download_dir(&self) -> PathBuf {
self.download_dir.clone().unwrap_or(std::env::temp_dir())
}
}

#[derive(Debug, Deserialize, Clone)]
Expand Down
61 changes: 43 additions & 18 deletions fendermint/app/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,25 +690,18 @@ where
tendermint::Hash::None => return Err(anyhow!("empty block hash").into()),
};

let db = self.state_store_clone();
let state = self.committed_state()?;
let mut state_params = state.state_params.clone();

tracing::debug!(
height = block_height,
timestamp = request.header.time.unix_timestamp(),
app_hash = request.header.app_hash.to_string(),
//app_state_hash = to_app_hash(&state_params).to_string(), // should be the same as `app_hash`
"begin block"
);

let db = self.state_store_clone();
let state = self.committed_state()?;
let mut state_params = state.state_params.clone();

// Notify the snapshotter. We don't do this in `commit` because *this* is the height at which
// this state has been officially associated with the application hash, which is something
// we will receive in `offer_snapshot` and we can compare. If we did it in `commit` we'd
// have to associate the snapshot with `block_height + 1`. But this way we also know that
// others have agreed with our results.
if let Some(ref snapshots) = self.snapshots {
atomically(|| snapshots.notify(block_height as u64, state_params.clone())).await;
}

state_params.timestamp = to_timestamp(request.header.time);

let state = FvmExecState::new(db, self.multi_engine.as_ref(), block_height, state_params)
Expand Down Expand Up @@ -801,6 +794,13 @@ where
let app_hash = state.app_hash();
let block_height = state.block_height;

// Tell CometBFT how much of the block history it can forget.
let retain_height = if self.state_hist_size == 0 {
Default::default()
} else {
block_height.saturating_sub(self.state_hist_size)
};

tracing::debug!(
block_height,
state_root = state_root.to_string(),
Expand All @@ -824,27 +824,45 @@ where
// notified about), we could add it to the `ChainMessageInterpreter` as a constructor argument,
// a sort of "ambient state", and not worry about in in the `App` at all.

// Notify the snapshotter. It wasn't clear whether this should be done in `commit` or `begin_block`,
// that is, whether the _height_ of the snapshot should be `block_height` or `block_height+1`.
// When CometBFT calls `offer_snapshot` it sends an `app_hash` in it that we compare to the CID
// of the `state_params`. Based on end-to-end testing it looks like it gives the `app_hash` from
// the *next* block, so we have to do it here.
// For example:
// a) Notify in `begin_block`: say we are at committing block 899, then we notify in `begin_block`
// that block 900 has this state (so we use `block_height+1` in notification);
// CometBFT is going to offer it with the `app_hash` of block 901, which won't match, because
// by then the timestamp will be different in the state params after committing block 900.
// b) Notify in `commit`: say we are committing block 900 and notify immediately that it has this state
// (even though this state will only be available to query from the next height);
// CometBFT is going to offer it with the `app_hash` of 901, but in this case that's good, because
// that hash reflects the changes made by block 900, which this state param is the result of.
if let Some(ref snapshots) = self.snapshots {
atomically(|| snapshots.notify(block_height, state.state_params.clone())).await;
}

// Commit app state to the datastore.
self.set_committed_state(state)?;

// Reset check state.
let mut guard = self.check_state.lock().await;
*guard = None;

let response = response::Commit {
Ok(response::Commit {
data: app_hash.into(),
// We have to retain blocks until we can support Snapshots.
retain_height: Default::default(),
};
Ok(response)
retain_height: retain_height.try_into().expect("height is valid"),
})
}

/// List the snapshots available on this node to be served to remote peers.
async fn list_snapshots(&self) -> AbciResult<response::ListSnapshots> {
if let Some(ref client) = self.snapshots {
let snapshots = atomically(|| client.list_snapshots()).await;
tracing::info!(snapshot_count = snapshots.len(), "listing snaphots");
Ok(to_snapshots(snapshots)?)
} else {
tracing::info!("listing snaphots disabled");
Ok(Default::default())
}
}
Expand Down Expand Up @@ -882,6 +900,11 @@ where
request: request::OfferSnapshot,
) -> AbciResult<response::OfferSnapshot> {
if let Some(ref client) = self.snapshots {
tracing::info!(
height = request.snapshot.height.value(),
"received snapshot offer"
);

match from_snapshot(request).context("failed to parse snapshot") {
Ok(manifest) => {
tracing::info!(?manifest, "received snapshot offer");
Expand Down Expand Up @@ -955,6 +978,8 @@ where

// Now insert the new state into the history.
let mut state = self.committed_state()?;

// The height reflects that it was produced in `commit`.
state.block_height = snapshot.manifest.block_height;
state.state_params = snapshot.manifest.state_params;
self.set_committed_state(state)?;
Expand Down
17 changes: 10 additions & 7 deletions fendermint/app/src/cmd/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use fendermint_vm_interpreter::{
signed::SignedMessageInterpreter,
};
use fendermint_vm_resolver::ipld::IpldResolver;
use fendermint_vm_snapshot::SnapshotManager;
use fendermint_vm_snapshot::{SnapshotManager, SnapshotParams};
use fendermint_vm_topdown::proxy::IPCProviderProxy;
use fendermint_vm_topdown::sync::launch_polling_syncer;
use fendermint_vm_topdown::{CachedFinalityProvider, Toggle};
Expand Down Expand Up @@ -187,12 +187,15 @@ async fn run(settings: Settings) -> anyhow::Result<()> {
let snapshots = if settings.snapshots.enabled {
let (manager, client) = SnapshotManager::new(
state_store.clone(),
settings.snapshots_dir(),
settings.snapshots.block_interval,
settings.snapshots.chunk_size_bytes,
settings.snapshots.hist_size,
settings.snapshots.last_access_hold,
settings.snapshots.sync_poll_interval,
SnapshotParams {
snapshots_dir: settings.snapshots_dir(),
download_dir: settings.snapshots.download_dir(),
block_interval: settings.snapshots.block_interval,
chunk_size: settings.snapshots.chunk_size_bytes,
hist_size: settings.snapshots.hist_size,
last_access_hold: settings.snapshots.last_access_hold,
sync_poll_interval: settings.snapshots.sync_poll_interval,
},
)
.context("failed to create snapshot manager")?;

Expand Down
6 changes: 5 additions & 1 deletion fendermint/app/src/tmconv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,11 @@ pub fn from_snapshot(
let app_hash = to_app_hash(&metadata.state_params);

if app_hash != offer.app_hash {
bail!("the application hash does not match the metadata");
bail!(
"the application hash does not match the metadata; from-meta = {}, from-offer = {}",
app_hash,
offer.app_hash,
);
}

let checksum = tendermint::hash::Hash::try_from(offer.snapshot.hash)
Expand Down
Loading