From 36a920757a8360313afe1f1315c7d7278002dbbc Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Mon, 11 Dec 2023 14:56:51 +0000 Subject: [PATCH 01/18] FM-363: Rename fullnode.env to node-1.env and make reusable tasks --- .../testing/snapshot-test/Makefile.toml | 95 ++++++++++++------- .../scripts/{fullnode.env => node-1.env} | 0 fendermint/testing/snapshot-test/src/lib.rs | 11 ++- 3 files changed, 67 insertions(+), 39 deletions(-) rename fendermint/testing/snapshot-test/scripts/{fullnode.env => node-1.env} (100%) diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 0b09d560..a7a2cf39 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -12,12 +12,14 @@ extend = [ ] env_files = [ + # `snapshot.env` is the environment for `cargo make`. { path = "./scripts/snapshot.env" }, { path = "../Makefile/common.env" }, { path = "../Makefile/ci.env", profile = "ci" }, ] # Overriding the env file to enable snapshotting. +# This one is applied on every *container*. [tasks.test-data-env] script = """ cat << EOF > ${TEST_DATA_DIR}/.env @@ -30,9 +32,15 @@ FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 EOF """ +# This is the test workflow [tasks.test] clear = true -dependencies = ["snapshot-wait", "snapshot-created", "fullnode-sync"] +run_task = { name = [ + "snapshot-wait", + "snapshot-created", + "node-1-setup", + "node-1-sync-test", +], fork = true, cleanup_task = "snapshot-teardown" } # Wait enough time that some snapshots should be exported. [tasks.snapshot-wait] @@ -49,52 +57,56 @@ if [ -z "$(ls -A $FM_SNAPSOTS_DIR)" ]; then fi """ - -# Set up a full node that syncs with the default one, then stop it. -[tasks.fullnode-sync] -run_task = { name = [ - "fullnode-setup", - "fullnode-test", -], fork = true, cleanup_task = "fullnode-teardown" } - - -[tasks.fullnode-setup] -dependencies = ["cometbft-export-node-id"] -env_files = [{ path = "./scripts/fullnode.env" }] +# Shut down all non-default nodes. +[tasks.snapshot-teardown] run_task = { name = [ - "test-node-dir", - "cometbft-init", - "fullnode-set-seed", - "fullnode-copy-genesis", - "fendermint-start", - "cometbft-start", - "cometbft-wait", - "fendermint-logs", - "cometbft-logs", + "node-1-teardown", + #"node-2-teardown", ] } +# ### General tasks for node-1 and node-2 + # Set the persistent peer address to that of the default node-0. -[tasks.fullnode-set-seed] -env_files = [{ path = "./scripts/fullnode.env" }] +[tasks.node-set-seed] +env_files = [{ path = "./scripts/node-1.env" }] script = """ CMT_SEED_ID=$(cat $BASE_DIR/$SEED_NODE_NAME/node-id) CMT_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_PERSISTENT_PEERS\\"|" $BASE_DIR/${NODE_NAME}/cometbft/config/config.toml """ - # Get the genesis from node-0 -[tasks.fullnode-copy-genesis] -env_files = [{ path = "./scripts/fullnode.env" }] +[tasks.node-copy-genesis] script = """ cp $BASE_DIR/${SEED_NODE_NAME}/cometbft/config/genesis.json \ $BASE_DIR/${NODE_NAME}/cometbft/config/genesis.json """ +[tasks.node-setup] +# Export node-0 ID. +dependencies = ["cometbft-export-node-id"] +run_task = { name = [ + "test-node-dir", + "cometbft-init", + "node-set-seed", + "node-copy-genesis", + "fendermint-start", + "cometbft-start", + "cometbft-wait", + "fendermint-logs", + "cometbft-logs", +] } + +[tasks.node-teardown] +run_task = { name = [ + "cometbft-destroy", + "fendermint-destroy", + "test-node-dir-rm", +] } + # See if it managed to sync. -[tasks.fullnode-test] -env_files = [{ path = "./scripts/fullnode.env" }] +[tasks.node-sync-test] script = """ EARLIEST=$(curl -s localhost:${CMT_RPC_HOST_PORT}/status | jq -r ".result.sync_info.earliest_block_height") LATEST=$(curl -s localhost:${CMT_RPC_HOST_PORT}/status | jq -r ".result.sync_info.latest_block_height") @@ -105,11 +117,22 @@ if [ "$EARLIEST" = "$LATEST" ]; then fi """ +# ### node-1 tasks -[tasks.fullnode-teardown] -env_files = [{ path = "./scripts/fullnode.env" }] -run_task = { name = [ - "cometbft-destroy", - "fendermint-destroy", - "test-node-dir-rm", -] } +[tasks.node-1-setup] +env_files = [{ path = "./scripts/node-1.env" }] +extend = "node-setup" + +[tasks.node-1-teardown] +env_files = [{ path = "./scripts/node-1.env" }] +extend = "node-teardown" + +[tasks.node-1-sync-test] +env_files = [{ path = "./scripts/node-1.env" }] +extend = "node-sync-test" + +# ### node-2 tasks + +# [tasks.node-2-teardown] +# extend = "node-teardown" +# env_files = [{ path = "./scripts/node-2.env" }] diff --git a/fendermint/testing/snapshot-test/scripts/fullnode.env b/fendermint/testing/snapshot-test/scripts/node-1.env similarity index 100% rename from fendermint/testing/snapshot-test/scripts/fullnode.env rename to fendermint/testing/snapshot-test/scripts/node-1.env diff --git a/fendermint/testing/snapshot-test/src/lib.rs b/fendermint/testing/snapshot-test/src/lib.rs index 43d95313..4e0fe300 100644 --- a/fendermint/testing/snapshot-test/src/lib.rs +++ b/fendermint/testing/snapshot-test/src/lib.rs @@ -1,8 +1,13 @@ // Copyright 2022-2023 Protocol Labs // SPDX-License-Identifier: Apache-2.0, MIT -//! Run tests against multiple Fendermint+CometBFT docker container pairs locally, -//! where one is allowed to run for a while and export some snapshots, then another -//! is started to sync its state directly with it. +//! Run tests against multiple Fendermint+CometBFT docker container pairs locally: +//! 0. The default `snapshot-fendermint` and `snapshot-cometbft` pair +//! 1. A `snapshot-fendermint-1` and `snapshot-cometbft-1` using `scripts/node-1.env`, +//! which sync with the default node from genesis on a block-by-block basis. +//! 2. A `snapshot-fendermint-2` and `snapshot-cometbft-2` using `scripts/node-2.env`, +//! which syncs with `node-0` and `node-1` using snapshots (a.k.a. state sync). +//! +//! Note that CometBFT state sync requires 2 RPC servers, which is why we need 3 nodes. //! //! Example: //! From f898de7a29b21fa3eb51ddd7b73c263845a1adde Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Mon, 11 Dec 2023 15:24:03 +0000 Subject: [PATCH 02/18] FM-363: Add a second full node --- .../testing/snapshot-test/Makefile.toml | 26 +++++++++++++++---- .../testing/snapshot-test/scripts/node-1.env | 1 - .../testing/snapshot-test/scripts/node-2.env | 9 +++++++ infra/scripts/cometbft.toml | 1 - 4 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 fendermint/testing/snapshot-test/scripts/node-2.env diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index a7a2cf39..73d918eb 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -20,6 +20,8 @@ env_files = [ # Overriding the env file to enable snapshotting. # This one is applied on every *container*. +# The other env files are for `cargo make` itself, +# the values are only available inside TOML files. [tasks.test-data-env] script = """ cat << EOF > ${TEST_DATA_DIR}/.env @@ -29,6 +31,8 @@ FM_SNAPSHOTS__BLOCK_INTERVAL=10 FM_SNAPSHOTS__HIST_SIZE=10 FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 +CMT_PEX=true +CMT_MAX_NUM_OUTBOUND_PEERS=2 EOF """ @@ -40,6 +44,8 @@ run_task = { name = [ "snapshot-created", "node-1-setup", "node-1-sync-test", + "node-2-setup", + "node-2-sync-test", ], fork = true, cleanup_task = "snapshot-teardown" } # Wait enough time that some snapshots should be exported. @@ -61,7 +67,7 @@ fi [tasks.snapshot-teardown] run_task = { name = [ "node-1-teardown", - #"node-2-teardown", + "node-2-teardown", ] } @@ -69,7 +75,6 @@ run_task = { name = [ # Set the persistent peer address to that of the default node-0. [tasks.node-set-seed] -env_files = [{ path = "./scripts/node-1.env" }] script = """ CMT_SEED_ID=$(cat $BASE_DIR/$SEED_NODE_NAME/node-id) CMT_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 @@ -133,6 +138,17 @@ extend = "node-sync-test" # ### node-2 tasks -# [tasks.node-2-teardown] -# extend = "node-teardown" -# env_files = [{ path = "./scripts/node-2.env" }] +# TODO: Get a trusted height and block hash from node-0, something that has a snapshot. +# TODO: Configure node-2 to use the RPC addresses of node-0 and node-1 for state sync. +# TODO: Make sure when node-2 starts it's unable to sync from genesis. +[tasks.node-2-setup] +env_files = [{ path = "./scripts/node-2.env" }] +extend = "node-setup" + +[tasks.node-2-teardown] +env_files = [{ path = "./scripts/node-2.env" }] +extend = "node-teardown" + +[tasks.node-2-sync-test] +env_files = [{ path = "./scripts/node-2.env" }] +extend = "node-sync-test" diff --git a/fendermint/testing/snapshot-test/scripts/node-1.env b/fendermint/testing/snapshot-test/scripts/node-1.env index b6a21f47..8b7c9204 100644 --- a/fendermint/testing/snapshot-test/scripts/node-1.env +++ b/fendermint/testing/snapshot-test/scripts/node-1.env @@ -6,5 +6,4 @@ CMT_CONTAINER_NAME=snapshot-cometbft-1 CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft CMT_P2P_HOST_PORT=26666 CMT_RPC_HOST_PORT=26667 -CMT_MAX_NUM_OUTBOUND_PEERS=1 CMT_WAIT_MILLIS=20000 diff --git a/fendermint/testing/snapshot-test/scripts/node-2.env b/fendermint/testing/snapshot-test/scripts/node-2.env new file mode 100644 index 00000000..684ad1f6 --- /dev/null +++ b/fendermint/testing/snapshot-test/scripts/node-2.env @@ -0,0 +1,9 @@ +SEED_NODE_NAME=node-0 +SEED_CMT_CONTAINER_NAME=snapshot-cometbft +NODE_NAME=node-2 +FM_CONTAINER_NAME=snapshot-fendermint-2 +CMT_CONTAINER_NAME=snapshot-cometbft-2 +CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft +CMT_P2P_HOST_PORT=26676 +CMT_RPC_HOST_PORT=26677 +CMT_WAIT_MILLIS=20000 diff --git a/infra/scripts/cometbft.toml b/infra/scripts/cometbft.toml index b34b8ba8..811693a8 100644 --- a/infra/scripts/cometbft.toml +++ b/infra/scripts/cometbft.toml @@ -26,7 +26,6 @@ docker run \ --volume ${CMT_DIR}:/cometbft \ --env-file ${ENV_FILE} \ --env CMT_PROXY_APP=tcp://${FM_CONTAINER_NAME}:26658 \ - --env CMT_PEX=false \ --env CMT_MAX_SUBSCRIPTION_CLIENTS=10 \ --env CMT_MAX_SUBSCRIPTIONS_PER_CLIENT=1000 \ ${CMT_DOCKER_IMAGE} \ From dbe7f6d670240efc1ecc0414480ca3ebce1c1cfd Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 10:09:51 +0000 Subject: [PATCH 03/18] FM-363: Set retention height --- fendermint/app/src/app.rs | 15 +++++++---- .../testing/snapshot-test/Makefile.toml | 27 +++++++++++++++++-- .../testing/snapshot-test/scripts/node-1.env | 1 + .../testing/snapshot-test/scripts/node-2.env | 1 + 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/fendermint/app/src/app.rs b/fendermint/app/src/app.rs index 82bc463e..a9eeedfa 100644 --- a/fendermint/app/src/app.rs +++ b/fendermint/app/src/app.rs @@ -801,6 +801,13 @@ where let app_hash = state.app_hash(); let block_height = state.block_height; + // Tell CometBFT how much of the block history it can forget. + let retain_height = if self.state_hist_size == 0 { + Default::default() + } else { + block_height.saturating_sub(self.state_hist_size) + }; + tracing::debug!( block_height, state_root = state_root.to_string(), @@ -831,12 +838,10 @@ where let mut guard = self.check_state.lock().await; *guard = None; - let response = response::Commit { + Ok(response::Commit { data: app_hash.into(), - // We have to retain blocks until we can support Snapshots. - retain_height: Default::default(), - }; - Ok(response) + retain_height: retain_height.try_into().expect("height is valid"), + }) } /// List the snapshots available on this node to be served to remote peers. diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 73d918eb..f33363f6 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -25,7 +25,6 @@ env_files = [ [tasks.test-data-env] script = """ cat << EOF > ${TEST_DATA_DIR}/.env -FM_DB__HIST_SIZE=100 FM_SNAPSHOTS__ENABLED=true FM_SNAPSHOTS__BLOCK_INTERVAL=10 FM_SNAPSHOTS__HIST_SIZE=10 @@ -33,6 +32,7 @@ FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 CMT_PEX=true CMT_MAX_NUM_OUTBOUND_PEERS=2 +CMT_STATESYNC__ENABLE=true EOF """ @@ -93,6 +93,7 @@ cp $BASE_DIR/${SEED_NODE_NAME}/cometbft/config/genesis.json \ dependencies = ["cometbft-export-node-id"] run_task = { name = [ "test-node-dir", + "node-env", "cometbft-init", "node-set-seed", "node-copy-genesis", @@ -122,8 +123,31 @@ if [ "$EARLIEST" = "$LATEST" ]; then fi """ +# Tell node-1 to prune history, and node-2 to statesync from node-0 or node-1 +[tasks.node-env] +script = """ +cat ${TEST_DATA_DIR}/.env > ${TEST_DATA_DIR}/${NODE_NAME}/.env + +if [ $NODE_NAME = "node-1" ]; then +cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env +FM_DB__HIST_SIZE=100 +EOL +fi + +if [ $NODE_NAME = "node-2" ]; then +cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env +FM_DB__HIST_SIZE=100 +CMT_STATESYNC__RPC_SERVERS="http://snapshot-cometbft:26657,http://snapshot-cometbft-1:26657" +EOL +fi +""" + # ### node-1 tasks +[tasks.node-1-env] +env_files = [{ path = "./scripts/node-1.env" }] +extend = "node-env" + [tasks.node-1-setup] env_files = [{ path = "./scripts/node-1.env" }] extend = "node-setup" @@ -139,7 +163,6 @@ extend = "node-sync-test" # ### node-2 tasks # TODO: Get a trusted height and block hash from node-0, something that has a snapshot. -# TODO: Configure node-2 to use the RPC addresses of node-0 and node-1 for state sync. # TODO: Make sure when node-2 starts it's unable to sync from genesis. [tasks.node-2-setup] env_files = [{ path = "./scripts/node-2.env" }] diff --git a/fendermint/testing/snapshot-test/scripts/node-1.env b/fendermint/testing/snapshot-test/scripts/node-1.env index 8b7c9204..7cd31b90 100644 --- a/fendermint/testing/snapshot-test/scripts/node-1.env +++ b/fendermint/testing/snapshot-test/scripts/node-1.env @@ -1,6 +1,7 @@ SEED_NODE_NAME=node-0 SEED_CMT_CONTAINER_NAME=snapshot-cometbft NODE_NAME=node-1 +ENV_FILE=${TEST_DATA_DIR}/${NODE_NAME}/.env FM_CONTAINER_NAME=snapshot-fendermint-1 CMT_CONTAINER_NAME=snapshot-cometbft-1 CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft diff --git a/fendermint/testing/snapshot-test/scripts/node-2.env b/fendermint/testing/snapshot-test/scripts/node-2.env index 684ad1f6..b4fd4db5 100644 --- a/fendermint/testing/snapshot-test/scripts/node-2.env +++ b/fendermint/testing/snapshot-test/scripts/node-2.env @@ -1,6 +1,7 @@ SEED_NODE_NAME=node-0 SEED_CMT_CONTAINER_NAME=snapshot-cometbft NODE_NAME=node-2 +ENV_FILE=${TEST_DATA_DIR}/${NODE_NAME}/.env FM_CONTAINER_NAME=snapshot-fendermint-2 CMT_CONTAINER_NAME=snapshot-cometbft-2 CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft From c2c9b16b5b911bc7bf3afb262b32da9c19f0626f Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 10:37:26 +0000 Subject: [PATCH 04/18] FM-363: Set trusted height --- .../testing/snapshot-test/Makefile.toml | 27 ++++++++++++------- fendermint/testing/snapshot-test/src/lib.rs | 2 ++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index f33363f6..93079172 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -123,21 +123,28 @@ if [ "$EARLIEST" = "$LATEST" ]; then fi """ -# Tell node-1 to prune history, and node-2 to statesync from node-0 or node-1 +# Tell node-2 to statesync from node-0 or node-1 +# Setting the trusted height to 10 and fetching its block hash (which appears in the header @ 11), +# so that it can do whatever it wants after that. +# Not telling any of the nodes to prune their history with `FM_DB__STATE_HIST_SIZE=100` for example, +# so that we can repeat this test any time (node-1 needs to sync from genesis of node-0) and also +# so that we can check that node-2 will not have a full history of blocks, which should only be the +# case if it synced from a snapshot; if it was also told to prune the data then that test would be +# inconclusive. +# Alternatively we could set up 4 nodes: node-0 no pruning, node-1 and node-2 boot from node-0 and prune, node-3 boot from node-1 and node-2, +# however it's unclear whether node-3 would discover node-0 through PEX and boot from there, and also the setting of a trusted height would shift. [tasks.node-env] script = """ cat ${TEST_DATA_DIR}/.env > ${TEST_DATA_DIR}/${NODE_NAME}/.env -if [ $NODE_NAME = "node-1" ]; then -cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -FM_DB__HIST_SIZE=100 -EOL -fi - if [ $NODE_NAME = "node-2" ]; then + +TRUST_HASH=$(curl -s "http://localhost:26657/header?height=11" | jq -r ".result.header.last_block_id.hash") + cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -FM_DB__HIST_SIZE=100 -CMT_STATESYNC__RPC_SERVERS="http://snapshot-cometbft:26657,http://snapshot-cometbft-1:26657" +CMT_STATESYNC_RPC_SERVERS="http://snapshot-cometbft:26657,http://snapshot-cometbft-1:26657" +CMT_STATESYNC_TRUST_HEIGHT=10 +CMT_STATESYNC_TRUST_HASH=$TRUST_HASH EOL fi """ @@ -163,7 +170,7 @@ extend = "node-sync-test" # ### node-2 tasks # TODO: Get a trusted height and block hash from node-0, something that has a snapshot. -# TODO: Make sure when node-2 starts it's unable to sync from genesis. +# TODO: Test that node-2 did not sync from genesis. [tasks.node-2-setup] env_files = [{ path = "./scripts/node-2.env" }] extend = "node-setup" diff --git a/fendermint/testing/snapshot-test/src/lib.rs b/fendermint/testing/snapshot-test/src/lib.rs index 4e0fe300..e8486754 100644 --- a/fendermint/testing/snapshot-test/src/lib.rs +++ b/fendermint/testing/snapshot-test/src/lib.rs @@ -9,6 +9,8 @@ //! //! Note that CometBFT state sync requires 2 RPC servers, which is why we need 3 nodes. //! +//! See and +//! //! Example: //! //! ```text From 8d77436f43dd4b16254b53c3cfe29f35500aa243 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 11:03:31 +0000 Subject: [PATCH 05/18] FM-363: Fix CMT_ prefixes, add RPC or P2P where necessary --- docs/ipc.md | 5 ++--- .../testing/snapshot-test/Makefile.toml | 2 +- infra/docker-compose.yml | 4 ++-- infra/scripts/testnet.toml | 22 +++++++++---------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/docs/ipc.md b/docs/ipc.md index ba58e3e5..ea3f39b1 100644 --- a/docs/ipc.md +++ b/docs/ipc.md @@ -24,7 +24,7 @@ cargo make --makefile infra/Makefile.toml \ -e BOOTSTRAPS= -e PARENT_REGISTRY= \ -e PARENT_GATEWAY= \ - -e CMT_EXTERNAL_ADDR= \ + -e CMT_P2P_EXTERNAL_ADDR= \ bootstrap ``` You'll see that by the end of the output, this command should output the network address of your bootstrap. You can use this endpoint to include this bootstrap node as a seed in the `seeds` configuration of CometBFT. @@ -47,7 +47,7 @@ cargo make --makefile infra/Makefile.toml \ - `SUBNET_ID`: SubnetID the bootstrap is operating in. - `NODE_NAME` (optional): Node name information to attach to the containers of the deployment. This will be needed to deploy more than one bootstrap in the same local environment. - `BOOTSTRAPS`: Comma separated list of bootstraps (or seeds in CometBFT parlance) that we want this bootstrap to also be connected to. -- `CMT_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:`). +- `CMT_P2P_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:`). - `PARENT_ENDPOINT`: Public endpoint that the validator should use to connect to the parent. - `PARENT_REGISTRY`: Ethereum address of the IPC registry contract in the parent - `PARENT_GATEWAY`: Ethereum address of the IPC gateway contract in the parent. @@ -122,4 +122,3 @@ The full node also has its corresponding commands to kill and restart the node: cargo make --makefile infra/Makefile.toml child-fullnode-down cargo make --makefile infra/Makefile.toml child-fullnode-restart ``` - diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 93079172..5e5a31f9 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -31,7 +31,7 @@ FM_SNAPSHOTS__HIST_SIZE=10 FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 CMT_PEX=true -CMT_MAX_NUM_OUTBOUND_PEERS=2 +CMT_P2P_MAX_NUM_OUTBOUND_PEERS=2 CMT_STATESYNC__ENABLE=true EOF """ diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index 3cb19357..8a1489f4 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -24,9 +24,9 @@ services: environment: - ID=${NODE_ID} - LOG=${LOG:-cometbft-node${NODE_ID}.log} - - CMT_PEX=true - CMT_PROXY_APP=tcp://fendermint-node${NODE_ID}:26658 - - CMT_PERSISTENT_PEERS="${CMT_PERSISTENT_PEERS}" + - CMT_P2P_PEX=true + - CMT_P2P_PERSISTENT_PEERS="${CMT_P2P_PERSISTENT_PEERS}" volumes: - $BASE_DIR/node${NODE_ID}/cometbft:/cometbft healthcheck: diff --git a/infra/scripts/testnet.toml b/infra/scripts/testnet.toml index 974b2685..20519960 100644 --- a/infra/scripts/testnet.toml +++ b/infra/scripts/testnet.toml @@ -6,7 +6,7 @@ dependencies = [ "testnet-down", "testnet-init", "fendermint-deps", - "testnet-up" + "testnet-up", ] [tasks.testnet-up] @@ -15,7 +15,7 @@ if [ -z $GID ]; then GID=$(id -g); fi if [ -z $UID ]; then UID=$(id -u); fi export UID export GID -export CMT_PERSISTENT_PEERS=`cat $BASE_DIR/peers` +export CMT_P2P_PERSISTENT_PEERS=`cat $BASE_DIR/peers` export SUBNET_ID=$SUBNET_ID export BASE_DIR=$BASE_DIR ./infra/run.sh start @@ -23,7 +23,7 @@ export BASE_DIR=$BASE_DIR [tasks.testnet-down] script = """ -export CMT_PERSISTENT_PEERS="UNDEFINED" +export CMT_P2P_PERSISTENT_PEERS="UNDEFINED" if [ -z $GID ]; then GID=$(id -g); fi if [ -z $UID ]; then UID=$(id -u); fi export UID @@ -78,19 +78,19 @@ release ${nodes} """ [tasks.testnet-clear] -script=""" +script = """ echo clearing all IPC data rm -rf ${BASE_DIR} """ [tasks.testnet-mkdir] -script=""" +script = """ mkdir -p ${BASE_DIR} """ [tasks.testnet-cometbft-init] extend = "cometbft-init" -env = { "CMD" = "init", "NETWORK_NAME"="${NETWORK_NAME}", "CMT_DIR" = "${BASE_DIR}/${NODE_NAME}/cometbft", "CMT_CONTAINER_NAME" = "cometbft-node${NUMBER}", "FLAGS" = "-a STDOUT -a STDERR --rm"} +env = { "CMD" = "init", "NETWORK_NAME" = "${NETWORK_NAME}", "CMT_DIR" = "${BASE_DIR}/${NODE_NAME}/cometbft", "CMT_CONTAINER_NAME" = "cometbft-node${NUMBER}", "FLAGS" = "-a STDOUT -a STDERR --rm" } [tasks.testnet-add-peer] extend = "fendermint-tool" @@ -101,13 +101,13 @@ env = { "ENTRY" = "fendermint", "CMD" = """key add-peer \ """ } [tasks.testnet-setup-persistent-peers] -script=""" -unset CMT_PERSISTENT_PEERS -export CMT_PERSISTENT_PEERS=`cat $BASE_DIR/peers` -echo Persistent peers: $CMT_PERSISTENT_PEERS +script = """ +unset CMT_P2P_PERSISTENT_PEERS +export CMT_P2P_PERSISTENT_PEERS=`cat $BASE_DIR/peers` +echo Persistent peers: $CMT_P2P_PERSISTENT_PEERS for i in $(seq 0 3); do - sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_PERSISTENT_PEERS\\"|" $BASE_DIR/node${i}/cometbft/config/config.toml + sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_P2P_PERSISTENT_PEERS\\"|" $BASE_DIR/node${i}/cometbft/config/config.toml done """ From 74d10a1d5ad5e08ca74925ab7ed82c3a4d125582 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 11:11:07 +0000 Subject: [PATCH 06/18] FM-363: Fix CMT_ prefixes, add RPC or P2P where necessary --- docs/ipc.md | 6 +++--- fendermint/testing/smoke-test/Makefile.toml | 8 ++++++++ infra/Makefile.toml | 2 +- infra/scripts/cometbft.toml | 6 +++--- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/docs/ipc.md b/docs/ipc.md index ea3f39b1..9f567767 100644 --- a/docs/ipc.md +++ b/docs/ipc.md @@ -76,7 +76,7 @@ cargo make --makefile infra/Makefile.toml \ -e BOOTSTRAPS= -e PARENT_REGISTRY= \ -e PARENT_GATEWAY= \ - -e CMT_EXTERNAL_ADDR= \ + -e CMT_P2P_EXTERNAL_ADDR= \ child-validator ``` This command will run the infrastructure for a Fendermint validator in the child subnet. It will generate the genesis of the subnet from the information in its parent, and will run the validator's infrastructure with the specific configuration passed in the command. @@ -89,7 +89,7 @@ This command will run the infrastructure for a Fendermint validator in the child - `PRIVATE_KEY_PATH`: Path of the hex encoded private key for your validator (it should be the corresponding one used to join the subnet in the parent). This can be exported from the `ipc-cli` or any other wallet like Metamask. - `SUBNET_ID`: SubnetID for the child subnet. - `BOOTSTRAPS`: Comma separated list of bootstraps (or seeds in CometBFT parlance). -- `CMT_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:`). +- `CMT_P2P_EXTERNAL_ADDR`: Address to advertise to peers for them to dial. If empty, will use the same as the default listening address from CometBFT (generally `0.0.0.0:`). - `PARENT_ENDPOINT`: Public endpoint that the validator should use to connect to the parent. - `PARENT_REGISTRY`: Ethereum address of the IPC registry contract in the parent - `PARENT_GATEWAY`: Ethereum address of the IPC gateway contract in the parent. @@ -114,7 +114,7 @@ cargo make --makefile infra/Makefile.toml \ -e BOOTSTRAPS= -e PARENT_REGISTRY= \ -e PARENT_GATEWAY= \ - -e CMT_EXTERNAL_ADDR= \ + -e CMT_P2P_EXTERNAL_ADDR= \ child-fullnode ``` The full node also has its corresponding commands to kill and restart the node: diff --git a/fendermint/testing/smoke-test/Makefile.toml b/fendermint/testing/smoke-test/Makefile.toml index 236d0690..835806f1 100644 --- a/fendermint/testing/smoke-test/Makefile.toml +++ b/fendermint/testing/smoke-test/Makefile.toml @@ -20,6 +20,14 @@ env_files = [ { path = "../Makefile/ci.env", profile = "ci" }, ] +# Overriding the env file to enable state pruning. +[tasks.test-data-env] +script = """ +cat << EOF > ${TEST_DATA_DIR}/.env +FM_DB__STATE_HIST_SIZE=30 +EOF +""" + [tasks.test] clear = true dependencies = ["simplecoin-example", "ethapi-example"] diff --git a/infra/Makefile.toml b/infra/Makefile.toml index 91a979c1..fc4b3a79 100644 --- a/infra/Makefile.toml +++ b/infra/Makefile.toml @@ -19,7 +19,7 @@ SUBNET_ID = { value = "/r0", condition = { env_not_set = ["SUBNET_ID"] } } # The network name is derived from the SUBNET_ID, replacing slashes with dashes, and dropping the first dash if any. NETWORK_NAME = { script = ["echo $SUBNET_ID | sed -e 's|/|-|g' -e 's|^-||1'"] } # External P2P address advertised by CometBFT to other peers. -CMT_EXTERNAL_ADDR = { value = "", condition = { env_not_set = ["CMT_EXTERNAL_ADDR"] } } +CMT_P2P_EXTERNAL_ADDR = { value = "", condition = { env_not_set = ["CMT_P2P_EXTERNAL_ADDR"] } } BALANCE = { value = "1000", condition = { env_not_set = ["BALANCE"] } } BASE_FEE = { value = "1000", condition = { env_not_set = ["BASE_FEE"] } } diff --git a/infra/scripts/cometbft.toml b/infra/scripts/cometbft.toml index 811693a8..085db788 100644 --- a/infra/scripts/cometbft.toml +++ b/infra/scripts/cometbft.toml @@ -26,8 +26,8 @@ docker run \ --volume ${CMT_DIR}:/cometbft \ --env-file ${ENV_FILE} \ --env CMT_PROXY_APP=tcp://${FM_CONTAINER_NAME}:26658 \ - --env CMT_MAX_SUBSCRIPTION_CLIENTS=10 \ - --env CMT_MAX_SUBSCRIPTIONS_PER_CLIENT=1000 \ + --env CMT_RPC_MAX_SUBSCRIPTION_CLIENTS=10 \ + --env CMT_RPC_MAX_SUBSCRIPTIONS_PER_CLIENT=1000 \ ${CMT_DOCKER_IMAGE} \ ${CMD} """ @@ -196,7 +196,7 @@ sed -i'' -e "s/{{PLACEHOLDER}}/$BOOTSTRAPS/g" ${CMT_DIR}/config/config.toml [tasks.set-external-addr] script = """ sed -i'' -e 's|^external_address = ""$|external_address = "{{PLACEHOLDER}}"|g' ${CMT_DIR}/config/config.toml -sed -i'' -e "s/{{PLACEHOLDER}}/$CMT_EXTERNAL_ADDR/g" ${CMT_DIR}/config/config.toml +sed -i'' -e "s/{{PLACEHOLDER}}/$CMT_P2P_EXTERNAL_ADDR/g" ${CMT_DIR}/config/config.toml """ # This is required to run several validators locally. You may want to disable it when running From ba2c2c7159cefe01f4d2f3b1ce6faf793bc660b4 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 11:11:21 +0000 Subject: [PATCH 07/18] FM-363: Enable state pruning in smoke tests --- fendermint/testing/snapshot-test/Makefile.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 5e5a31f9..8bb0bac3 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -30,7 +30,7 @@ FM_SNAPSHOTS__BLOCK_INTERVAL=10 FM_SNAPSHOTS__HIST_SIZE=10 FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 -CMT_PEX=true +CMT_P2P_PEX=true CMT_P2P_MAX_NUM_OUTBOUND_PEERS=2 CMT_STATESYNC__ENABLE=true EOF @@ -77,8 +77,8 @@ run_task = { name = [ [tasks.node-set-seed] script = """ CMT_SEED_ID=$(cat $BASE_DIR/$SEED_NODE_NAME/node-id) -CMT_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 -sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_PERSISTENT_PEERS\\"|" $BASE_DIR/${NODE_NAME}/cometbft/config/config.toml +CMT_P2P_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 +sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_P2P_PERSISTENT_PEERS\\"|" $BASE_DIR/${NODE_NAME}/cometbft/config/config.toml """ # Get the genesis from node-0 From acfaa7c4fcd591b43249c5f398e6e2538bd82cbd Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 12:19:56 +0000 Subject: [PATCH 08/18] FM-363: Try a diamond setup --- fendermint/testing/smoke-test/Makefile.toml | 8 -- .../testing/snapshot-test/Makefile.toml | 97 ++++++++++--------- .../testing/snapshot-test/scripts/node-1.env | 4 +- .../testing/snapshot-test/scripts/node-2.env | 4 +- .../testing/snapshot-test/scripts/node-3.env | 10 ++ fendermint/testing/snapshot-test/src/lib.rs | 9 +- 6 files changed, 72 insertions(+), 60 deletions(-) create mode 100644 fendermint/testing/snapshot-test/scripts/node-3.env diff --git a/fendermint/testing/smoke-test/Makefile.toml b/fendermint/testing/smoke-test/Makefile.toml index 835806f1..236d0690 100644 --- a/fendermint/testing/smoke-test/Makefile.toml +++ b/fendermint/testing/smoke-test/Makefile.toml @@ -20,14 +20,6 @@ env_files = [ { path = "../Makefile/ci.env", profile = "ci" }, ] -# Overriding the env file to enable state pruning. -[tasks.test-data-env] -script = """ -cat << EOF > ${TEST_DATA_DIR}/.env -FM_DB__STATE_HIST_SIZE=30 -EOF -""" - [tasks.test] clear = true dependencies = ["simplecoin-example", "ethapi-example"] diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 8bb0bac3..d31fc69b 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -1,11 +1,4 @@ -# cd fendermint/testing/snapshot-test -# - then - -# cargo make -# - or - -# cargo make setup -# cargo make test -# docker logs snapshot-fendermint -# cargo make teardown +# See fendermint/testing/snapshot-test/src/lib.rs for description. extend = [ { path = "../Makefile/common.toml" }, @@ -22,16 +15,17 @@ env_files = [ # This one is applied on every *container*. # The other env files are for `cargo make` itself, # the values are only available inside TOML files. +# Disabling PEX so nodes only connect to what they are told about. [tasks.test-data-env] script = """ cat << EOF > ${TEST_DATA_DIR}/.env FM_SNAPSHOTS__ENABLED=true FM_SNAPSHOTS__BLOCK_INTERVAL=10 -FM_SNAPSHOTS__HIST_SIZE=10 +FM_SNAPSHOTS__HIST_SIZE=6 FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 -CMT_P2P_PEX=true -CMT_P2P_MAX_NUM_OUTBOUND_PEERS=2 +CMT_P2P_PEX=false +CMT_P2P_MAX_NUM_OUTBOUND_PEERS=3 CMT_STATESYNC__ENABLE=true EOF """ @@ -46,6 +40,8 @@ run_task = { name = [ "node-1-sync-test", "node-2-setup", "node-2-sync-test", + "node-3-setup", + "node-3-sync-test", ], fork = true, cleanup_task = "snapshot-teardown" } # Wait enough time that some snapshots should be exported. @@ -68,26 +64,12 @@ fi run_task = { name = [ "node-1-teardown", "node-2-teardown", + "node-3-teardown", ] } # ### General tasks for node-1 and node-2 -# Set the persistent peer address to that of the default node-0. -[tasks.node-set-seed] -script = """ -CMT_SEED_ID=$(cat $BASE_DIR/$SEED_NODE_NAME/node-id) -CMT_P2P_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 -sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_P2P_PERSISTENT_PEERS\\"|" $BASE_DIR/${NODE_NAME}/cometbft/config/config.toml -""" - -# Get the genesis from node-0 -[tasks.node-copy-genesis] -script = """ -cp $BASE_DIR/${SEED_NODE_NAME}/cometbft/config/genesis.json \ - $BASE_DIR/${NODE_NAME}/cometbft/config/genesis.json -""" - [tasks.node-setup] # Export node-0 ID. dependencies = ["cometbft-export-node-id"] @@ -100,10 +82,26 @@ run_task = { name = [ "fendermint-start", "cometbft-start", "cometbft-wait", + "cometbft-export-node-id", "fendermint-logs", "cometbft-logs", ] } +# Set the persistent peer address to that of the default node-0. +[tasks.node-set-seed] +script = """ +CMT_SEED_ID=$(cat $BASE_DIR/$SEED_NODE_NAME/node-id) +CMT_P2P_PERSISTENT_PEERS=$CMT_SEED_ID@$SEED_CMT_CONTAINER_NAME:26656 +sed -i'' -e "s|persistent_peers = \\"\\"|persistent_peers = \\"$CMT_P2P_PERSISTENT_PEERS\\"|" $BASE_DIR/${NODE_NAME}/cometbft/config/config.toml +""" + +# Get the genesis from node-0 +[tasks.node-copy-genesis] +script = """ +cp $BASE_DIR/${SEED_NODE_NAME}/cometbft/config/genesis.json \ + $BASE_DIR/${NODE_NAME}/cometbft/config/genesis.json +""" + [tasks.node-teardown] run_task = { name = [ "cometbft-destroy", @@ -123,27 +121,29 @@ if [ "$EARLIEST" = "$LATEST" ]; then fi """ -# Tell node-2 to statesync from node-0 or node-1 +# Tell node-3 to statesync from node-1 and node-2 # Setting the trusted height to 10 and fetching its block hash (which appears in the header @ 11), # so that it can do whatever it wants after that. -# Not telling any of the nodes to prune their history with `FM_DB__STATE_HIST_SIZE=100` for example, -# so that we can repeat this test any time (node-1 needs to sync from genesis of node-0) and also -# so that we can check that node-2 will not have a full history of blocks, which should only be the -# case if it synced from a snapshot; if it was also told to prune the data then that test would be -# inconclusive. -# Alternatively we could set up 4 nodes: node-0 no pruning, node-1 and node-2 boot from node-0 and prune, node-3 boot from node-1 and node-2, -# however it's unclear whether node-3 would discover node-0 through PEX and boot from there, and also the setting of a trusted height would shift. +# Tell node-1 and node-2 to prune their states so node-3 (who only knows aobut node-1) has no chance +# but to use snapshots to sync itself. [tasks.node-env] script = """ cat ${TEST_DATA_DIR}/.env > ${TEST_DATA_DIR}/${NODE_NAME}/.env -if [ $NODE_NAME = "node-2" ]; then +cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env +FM_DB__STATE_HIST_SIZE=90 +EOL + +if [ $NODE_NAME = "node-3" ]; then -TRUST_HASH=$(curl -s "http://localhost:26657/header?height=11" | jq -r ".result.header.last_block_id.hash") +LATEST_HEIGHT=$(curl -s http://localhost:26657/commit | jq -r ".result.signed_header.header.height") +TRUST_HEIGHT=$(($LATEST_HEIGHT-30)) +QUERY_HEIGHT=$(($TRUST_HEIGHT+1)) +TRUST_HASH=$(curl -s "http://localhost:26657/header?height=$QUERY_HEIGHT" | jq -r ".result.header.last_block_id.hash") cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -CMT_STATESYNC_RPC_SERVERS="http://snapshot-cometbft:26657,http://snapshot-cometbft-1:26657" -CMT_STATESYNC_TRUST_HEIGHT=10 +CMT_STATESYNC_RPC_SERVERS="http://snapshot-cometbft-1:26657,http://snapshot-cometbft-2:26657" +CMT_STATESYNC_TRUST_HEIGHT=$TRUST_HEIGHT CMT_STATESYNC_TRUST_HASH=$TRUST_HASH EOL fi @@ -151,10 +151,6 @@ fi # ### node-1 tasks -[tasks.node-1-env] -env_files = [{ path = "./scripts/node-1.env" }] -extend = "node-env" - [tasks.node-1-setup] env_files = [{ path = "./scripts/node-1.env" }] extend = "node-setup" @@ -169,8 +165,6 @@ extend = "node-sync-test" # ### node-2 tasks -# TODO: Get a trusted height and block hash from node-0, something that has a snapshot. -# TODO: Test that node-2 did not sync from genesis. [tasks.node-2-setup] env_files = [{ path = "./scripts/node-2.env" }] extend = "node-setup" @@ -182,3 +176,18 @@ extend = "node-teardown" [tasks.node-2-sync-test] env_files = [{ path = "./scripts/node-2.env" }] extend = "node-sync-test" + + +# ### node-3 tasks + +[tasks.node-3-setup] +env_files = [{ path = "./scripts/node-3.env" }] +extend = "node-setup" + +[tasks.node-3-teardown] +env_files = [{ path = "./scripts/node-3.env" }] +extend = "node-teardown" + +[tasks.node-3-sync-test] +env_files = [{ path = "./scripts/node-3.env" }] +extend = "node-sync-test" diff --git a/fendermint/testing/snapshot-test/scripts/node-1.env b/fendermint/testing/snapshot-test/scripts/node-1.env index 7cd31b90..1fcb7f70 100644 --- a/fendermint/testing/snapshot-test/scripts/node-1.env +++ b/fendermint/testing/snapshot-test/scripts/node-1.env @@ -5,6 +5,6 @@ ENV_FILE=${TEST_DATA_DIR}/${NODE_NAME}/.env FM_CONTAINER_NAME=snapshot-fendermint-1 CMT_CONTAINER_NAME=snapshot-cometbft-1 CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft -CMT_P2P_HOST_PORT=26666 -CMT_RPC_HOST_PORT=26667 +CMT_P2P_HOST_PORT=26156 +CMT_RPC_HOST_PORT=26157 CMT_WAIT_MILLIS=20000 diff --git a/fendermint/testing/snapshot-test/scripts/node-2.env b/fendermint/testing/snapshot-test/scripts/node-2.env index b4fd4db5..7380f715 100644 --- a/fendermint/testing/snapshot-test/scripts/node-2.env +++ b/fendermint/testing/snapshot-test/scripts/node-2.env @@ -5,6 +5,6 @@ ENV_FILE=${TEST_DATA_DIR}/${NODE_NAME}/.env FM_CONTAINER_NAME=snapshot-fendermint-2 CMT_CONTAINER_NAME=snapshot-cometbft-2 CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft -CMT_P2P_HOST_PORT=26676 -CMT_RPC_HOST_PORT=26677 +CMT_P2P_HOST_PORT=26256 +CMT_RPC_HOST_PORT=26257 CMT_WAIT_MILLIS=20000 diff --git a/fendermint/testing/snapshot-test/scripts/node-3.env b/fendermint/testing/snapshot-test/scripts/node-3.env new file mode 100644 index 00000000..62f8705b --- /dev/null +++ b/fendermint/testing/snapshot-test/scripts/node-3.env @@ -0,0 +1,10 @@ +SEED_NODE_NAME=node-1 +SEED_CMT_CONTAINER_NAME=snapshot-cometbft-1 +NODE_NAME=node-3 +ENV_FILE=${TEST_DATA_DIR}/${NODE_NAME}/.env +FM_CONTAINER_NAME=snapshot-fendermint-3 +CMT_CONTAINER_NAME=snapshot-cometbft-3 +CMT_DIR=${TEST_DATA_DIR}/${NODE_NAME}/cometbft +CMT_P2P_HOST_PORT=26356 +CMT_RPC_HOST_PORT=26357 +CMT_WAIT_MILLIS=20000 diff --git a/fendermint/testing/snapshot-test/src/lib.rs b/fendermint/testing/snapshot-test/src/lib.rs index e8486754..a4d2166b 100644 --- a/fendermint/testing/snapshot-test/src/lib.rs +++ b/fendermint/testing/snapshot-test/src/lib.rs @@ -2,10 +2,11 @@ // SPDX-License-Identifier: Apache-2.0, MIT //! Run tests against multiple Fendermint+CometBFT docker container pairs locally: //! 0. The default `snapshot-fendermint` and `snapshot-cometbft` pair -//! 1. A `snapshot-fendermint-1` and `snapshot-cometbft-1` using `scripts/node-1.env`, -//! which sync with the default node from genesis on a block-by-block basis. -//! 2. A `snapshot-fendermint-2` and `snapshot-cometbft-2` using `scripts/node-2.env`, -//! which syncs with `node-0` and `node-1` using snapshots (a.k.a. state sync). +//! 1. A `snapshot-cometbft-1` and `snapshot-cometbft-2`, using `scripts/node-1.env` and `node-2`.env, +//! syncing with the default node from genesis on a block-by-block basis, and clear out their history +//! to force others who sync with them to use snapshots. +//! 2. A `snapshot-cometbft-3` using `scripts/node-3.env`, +//! which syncs with `node-1` and `node-2` using snapshots (a.k.a. state sync). //! //! Note that CometBFT state sync requires 2 RPC servers, which is why we need 3 nodes. //! From 49f264ab617f36a8f8369d02806ecab6e9fed58e Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 21:10:50 +0000 Subject: [PATCH 09/18] FM-363: Fix filename interpolation --- fendermint/vm/snapshot/src/state.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fendermint/vm/snapshot/src/state.rs b/fendermint/vm/snapshot/src/state.rs index 6b0d944a..46173149 100644 --- a/fendermint/vm/snapshot/src/state.rs +++ b/fendermint/vm/snapshot/src/state.rs @@ -67,7 +67,7 @@ impl SnapshotItem { self.manifest.chunks ); } - let chunk_file = self.snapshot_dir.join("{chunk}.part"); + let chunk_file = self.snapshot_dir.join(format!("{chunk}.part")); let content = std::fs::read(&chunk_file) .with_context(|| format!("failed to read chunk {}", chunk_file.to_string_lossy()))?; From dafdddb40af0a82f20f91f24bef855a5dc842505 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 21:10:58 +0000 Subject: [PATCH 10/18] FM-363: More logging --- fendermint/app/src/app.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fendermint/app/src/app.rs b/fendermint/app/src/app.rs index a9eeedfa..15ca2bfd 100644 --- a/fendermint/app/src/app.rs +++ b/fendermint/app/src/app.rs @@ -848,8 +848,10 @@ where async fn list_snapshots(&self) -> AbciResult { if let Some(ref client) = self.snapshots { let snapshots = atomically(|| client.list_snapshots()).await; + tracing::info!(snapshot_count = snapshots.len(), "listing snaphots"); Ok(to_snapshots(snapshots)?) } else { + tracing::info!("listing snaphots disabled"); Ok(Default::default()) } } @@ -887,6 +889,11 @@ where request: request::OfferSnapshot, ) -> AbciResult { if let Some(ref client) = self.snapshots { + tracing::info!( + height = request.snapshot.height.value(), + "received snapshot offer" + ); + match from_snapshot(request).context("failed to parse snapshot") { Ok(manifest) => { tracing::info!(?manifest, "received snapshot offer"); From d480d8c885f6a892af45691111c5bc4684bb3e0e Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 21:11:21 +0000 Subject: [PATCH 11/18] FM-363: Fix env vars --- fendermint/testing/snapshot-test/Makefile.toml | 7 ++++--- fendermint/testing/snapshot-test/src/lib.rs | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index d31fc69b..9e9aea23 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -26,7 +26,6 @@ FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 CMT_P2P_PEX=false CMT_P2P_MAX_NUM_OUTBOUND_PEERS=3 -CMT_STATESYNC__ENABLE=true EOF """ @@ -131,7 +130,7 @@ script = """ cat ${TEST_DATA_DIR}/.env > ${TEST_DATA_DIR}/${NODE_NAME}/.env cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -FM_DB__STATE_HIST_SIZE=90 +FM_DB__STATE_HIST_SIZE=45 EOL if [ $NODE_NAME = "node-3" ]; then @@ -142,9 +141,11 @@ QUERY_HEIGHT=$(($TRUST_HEIGHT+1)) TRUST_HASH=$(curl -s "http://localhost:26657/header?height=$QUERY_HEIGHT" | jq -r ".result.header.last_block_id.hash") cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -CMT_STATESYNC_RPC_SERVERS="http://snapshot-cometbft-1:26657,http://snapshot-cometbft-2:26657" +CMT_STATESYNC_ENABLE=true +CMT_STATESYNC_RPC_SERVERS=http://snapshot-cometbft-1:26657,http://snapshot-cometbft-2:26657 CMT_STATESYNC_TRUST_HEIGHT=$TRUST_HEIGHT CMT_STATESYNC_TRUST_HASH=$TRUST_HASH +CMT_STATESYNC_TEMP_DIR=/cometbft EOL fi """ diff --git a/fendermint/testing/snapshot-test/src/lib.rs b/fendermint/testing/snapshot-test/src/lib.rs index a4d2166b..83abf86a 100644 --- a/fendermint/testing/snapshot-test/src/lib.rs +++ b/fendermint/testing/snapshot-test/src/lib.rs @@ -12,11 +12,24 @@ //! //! See and //! -//! Example: +//! Examples: //! +//! 1. All in one go //! ```text //! cd fendermint/testing/snapshot-test //! cargo make //! ``` //! +//! 2. One by one +//! //! ```text +//! cd fendermint/testing/snapshot-test +//! cargo make setup +//! cargo make node-1 setup +//! cargo make node-2 setup +//! cargo make node-3 setup +//! docker logs snapshot-cometbft-3 +//! cargo make snapshot-teardown +//! cargo make teardown +//! ``` +//! //! Make sure you installed cargo-make by running `cargo install cargo-make` first. From 9fafb07a19e7c0258ec5fbeba0bba552ec5c8d16 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 21:21:48 +0000 Subject: [PATCH 12/18] FM-363: Fix parts dir --- fendermint/vm/snapshot/src/state.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fendermint/vm/snapshot/src/state.rs b/fendermint/vm/snapshot/src/state.rs index 46173149..38605fae 100644 --- a/fendermint/vm/snapshot/src/state.rs +++ b/fendermint/vm/snapshot/src/state.rs @@ -57,6 +57,11 @@ impl SnapshotItem { last_access: SystemTime::UNIX_EPOCH, } } + + fn parts_dir(&self) -> PathBuf { + self.snapshot_dir.join(PARTS_DIR_NAME) + } + /// Load the data from disk. /// /// Returns an error if the chunk isn't within range or if the file doesn't exist any more. @@ -67,7 +72,7 @@ impl SnapshotItem { self.manifest.chunks ); } - let chunk_file = self.snapshot_dir.join(format!("{chunk}.part")); + let chunk_file = self.parts_dir().join(format!("{chunk}.part")); let content = std::fs::read(&chunk_file) .with_context(|| format!("failed to read chunk {}", chunk_file.to_string_lossy()))?; @@ -80,8 +85,8 @@ impl SnapshotItem { where BS: Blockstore + Send + 'static, { - let parts = manifest::list_parts(self.snapshot_dir.join(PARTS_DIR_NAME)) - .context("failed to list snapshot parts")?; + let parts = + manifest::list_parts(self.parts_dir()).context("failed to list snapshot parts")?; // 1. Restore the snapshots into a complete `snapshot.car` file. let car_path = self.snapshot_dir.join(SNAPSHOT_FILE_NAME); From 6479eb44b3a6d502cf7feb8e022e0cfc9b81f466 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 23:06:06 +0000 Subject: [PATCH 13/18] FM-363: Specify download dir --- fendermint/app/settings/src/lib.rs | 8 ++++++++ fendermint/app/src/cmd/run.rs | 1 + .../testing/snapshot-test/Makefile.toml | 3 ++- fendermint/vm/snapshot/src/client.rs | 20 +++++++++---------- fendermint/vm/snapshot/src/manager.rs | 20 ++++++++++++------- fendermint/vm/snapshot/src/state.rs | 6 ++++++ 6 files changed, 40 insertions(+), 18 deletions(-) diff --git a/fendermint/app/settings/src/lib.rs b/fendermint/app/settings/src/lib.rs index 767b9354..d28afeb5 100644 --- a/fendermint/app/settings/src/lib.rs +++ b/fendermint/app/settings/src/lib.rs @@ -175,6 +175,14 @@ pub struct SnapshotSettings { /// How often to poll CometBFT to see whether it has caught up with the chain. #[serde_as(as = "DurationSeconds")] pub sync_poll_interval: Duration, + /// Temporary directory for downloads. + download_dir: Option, +} + +impl SnapshotSettings { + pub fn download_dir(&self) -> PathBuf { + self.download_dir.clone().unwrap_or(std::env::temp_dir()) + } } #[derive(Debug, Deserialize, Clone)] diff --git a/fendermint/app/src/cmd/run.rs b/fendermint/app/src/cmd/run.rs index 8638ce42..c804e042 100644 --- a/fendermint/app/src/cmd/run.rs +++ b/fendermint/app/src/cmd/run.rs @@ -188,6 +188,7 @@ async fn run(settings: Settings) -> anyhow::Result<()> { let (manager, client) = SnapshotManager::new( state_store.clone(), settings.snapshots_dir(), + settings.snapshots.download_dir(), settings.snapshots.block_interval, settings.snapshots.chunk_size_bytes, settings.snapshots.hist_size, diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 9e9aea23..bb0f77c7 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -21,7 +21,7 @@ script = """ cat << EOF > ${TEST_DATA_DIR}/.env FM_SNAPSHOTS__ENABLED=true FM_SNAPSHOTS__BLOCK_INTERVAL=10 -FM_SNAPSHOTS__HIST_SIZE=6 +FM_SNAPSHOTS__HIST_SIZE=10 FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 CMT_P2P_PEX=false @@ -146,6 +146,7 @@ CMT_STATESYNC_RPC_SERVERS=http://snapshot-cometbft-1:26657,http://snapshot-comet CMT_STATESYNC_TRUST_HEIGHT=$TRUST_HEIGHT CMT_STATESYNC_TRUST_HASH=$TRUST_HASH CMT_STATESYNC_TEMP_DIR=/cometbft +FM_SNAPSHOTS__DOWNLOAD_DIR=/data/${NODE_NAME}/fendermint/data EOL fi """ diff --git a/fendermint/vm/snapshot/src/client.rs b/fendermint/vm/snapshot/src/client.rs index 2d75093f..7df5ffc9 100644 --- a/fendermint/vm/snapshot/src/client.rs +++ b/fendermint/vm/snapshot/src/client.rs @@ -8,7 +8,6 @@ use fendermint_vm_interpreter::fvm::state::{ snapshot::{BlockHeight, SnapshotVersion}, FvmStateParams, }; -use tempfile::tempdir; use crate::{ manifest, @@ -19,14 +18,20 @@ use crate::{ /// Interface to snapshot state for the application. #[derive(Clone)] pub struct SnapshotClient { + download_dir: PathBuf, /// The client will only notify the manager of snapshottable heights. snapshot_interval: BlockHeight, state: SnapshotState, } impl SnapshotClient { - pub fn new(snapshot_interval: BlockHeight, state: SnapshotState) -> Self { + pub fn new( + download_dir: PathBuf, + snapshot_interval: BlockHeight, + state: SnapshotState, + ) -> Self { Self { + download_dir, snapshot_interval, state, } @@ -78,7 +83,7 @@ impl SnapshotClient { if manifest.version != 1 { abort(SnapshotError::IncompatibleVersion(manifest.version)) } else { - match tempdir() { + match tempfile::tempdir_in(&self.download_dir) { Ok(dir) => { // Create a `parts` sub-directory for the chunks. if let Err(e) = std::fs::create_dir(dir.path().join("parts")) { @@ -129,12 +134,7 @@ impl SnapshotClient { if index != next_index { abort(SnapshotError::UnexpectedChunk(next_index, index)) } else { - let part_path = cd - .download_dir - .as_ref() - .path() - .join("parts") - .join(format!("{}.part", index)); + let part_path = cd.parts_dir().join(format!("{}.part", index)); // We are doing IO inside the STM transaction, but that's okay because there is no contention on the download. match std::fs::write(part_path, contents) { @@ -144,7 +144,7 @@ impl SnapshotClient { if next_index == cd.manifest.chunks { // Verify the checksum then load the snapshot and remove the current download from memory. - match manifest::parts_checksum(cd.download_dir.as_ref()) { + match manifest::parts_checksum(cd.parts_dir()) { Ok(checksum) => { if checksum == cd.manifest.checksum { let item = SnapshotItem::new( diff --git a/fendermint/vm/snapshot/src/manager.rs b/fendermint/vm/snapshot/src/manager.rs index 3e3715fd..e91566b9 100644 --- a/fendermint/vm/snapshot/src/manager.rs +++ b/fendermint/vm/snapshot/src/manager.rs @@ -46,6 +46,7 @@ where pub fn new( store: BS, snapshots_dir: PathBuf, + download_dir: PathBuf, block_interval: BlockHeight, chunk_size: usize, hist_size: usize, @@ -71,7 +72,7 @@ where is_syncing: TVar::new(true), }; - let client = SnapshotClient::new(block_interval, state); + let client = SnapshotClient::new(download_dir, block_interval, state); Ok((manager, client)) } @@ -225,7 +226,9 @@ where // Create a checksum over the CAR file. let checksum_bytes = file_checksum(&snapshot_path).context("failed to compute checksum")?; - std::fs::write(&checksum_path, checksum_bytes).context("failed to write checksum file")?; + + std::fs::write(&checksum_path, checksum_bytes.to_string()) + .context("failed to write checksum file")?; // Create a directory for the parts. std::fs::create_dir(&parts_path).context("failed to create parts dir")?; @@ -343,7 +346,8 @@ mod tests { let (state_params, store) = init_genesis().await; // Now we have one store initialized with genesis, let's create a manager and snapshot it. - let temp_dir = tempfile::tempdir().expect("failed to create tmp dir"); + let snapshots_dir = tempfile::tempdir().expect("failed to create tmp dir"); + let download_dir = tempfile::tempdir().expect("failed to create tmp dir"); // Not polling because it's cumbersome to mock it. let never_poll_sync = Duration::ZERO; @@ -351,7 +355,8 @@ mod tests { let (snapshot_manager, snapshot_client) = SnapshotManager::new( store.clone(), - temp_dir.path().into(), + snapshots_dir.path().into(), + download_dir.path().into(), 1, 10000, 1, @@ -393,13 +398,13 @@ mod tests { assert_eq!(snapshot.manifest.state_params, state_params); assert_eq!( snapshot.snapshot_dir.as_path(), - temp_dir.path().join("snapshot-0") + snapshots_dir.path().join("snapshot-0") ); let _ = std::fs::File::open(snapshot.snapshot_dir.join("manifest.json")) .expect("manifests file exists"); - let snapshots = manifest::list_manifests(temp_dir.path()).unwrap(); + let snapshots = manifest::list_manifests(snapshots_dir.path()).unwrap(); assert_eq!(snapshots.len(), 1, "can list manifests"); assert_eq!(snapshots[0], snapshot); @@ -416,7 +421,8 @@ mod tests { // Create a new manager instance let (_, new_client) = SnapshotManager::new( store, - temp_dir.path().into(), + snapshots_dir.path().into(), + download_dir.path().into(), 1, 10000, 1, diff --git a/fendermint/vm/snapshot/src/state.rs b/fendermint/vm/snapshot/src/state.rs index 38605fae..016201c6 100644 --- a/fendermint/vm/snapshot/src/state.rs +++ b/fendermint/vm/snapshot/src/state.rs @@ -171,6 +171,12 @@ pub struct SnapshotDownload { pub next_index: TVar, } +impl SnapshotDownload { + pub fn parts_dir(&self) -> PathBuf { + self.download_dir.path().join(PARTS_DIR_NAME) + } +} + #[cfg(feature = "arb")] mod arb { use std::{path::PathBuf, time::SystemTime}; From 9e09230267b7c5dca5149832b211731070834c96 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 23:21:18 +0000 Subject: [PATCH 14/18] FM-363: Preserver longer history to support sync --- fendermint/app/src/app.rs | 10 ++++++---- fendermint/testing/snapshot-test/Makefile.toml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fendermint/app/src/app.rs b/fendermint/app/src/app.rs index 15ca2bfd..92856a40 100644 --- a/fendermint/app/src/app.rs +++ b/fendermint/app/src/app.rs @@ -690,16 +690,18 @@ where tendermint::Hash::None => return Err(anyhow!("empty block hash").into()), }; + let db = self.state_store_clone(); + let state = self.committed_state()?; + let mut state_params = state.state_params.clone(); + tracing::debug!( height = block_height, + timestamp = request.header.time.unix_timestamp(), app_hash = request.header.app_hash.to_string(), + //app_state_hash = to_app_hash(&state_params).to_string(), // should be the same as `app_hash` "begin block" ); - let db = self.state_store_clone(); - let state = self.committed_state()?; - let mut state_params = state.state_params.clone(); - // Notify the snapshotter. We don't do this in `commit` because *this* is the height at which // this state has been officially associated with the application hash, which is something // we will receive in `offer_snapshot` and we can compare. If we did it in `commit` we'd diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index bb0f77c7..16f9b003 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -130,7 +130,7 @@ script = """ cat ${TEST_DATA_DIR}/.env > ${TEST_DATA_DIR}/${NODE_NAME}/.env cat << EOL >> ${TEST_DATA_DIR}/${NODE_NAME}/.env -FM_DB__STATE_HIST_SIZE=45 +FM_DB__STATE_HIST_SIZE=100 EOL if [ $NODE_NAME = "node-3" ]; then From 5d00e7afa55482703b11b810c212da8398501f7c Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 23:47:28 +0000 Subject: [PATCH 15/18] FM-363: SnapshotParams --- fendermint/app/src/cmd/run.rs | 18 +++--- fendermint/vm/snapshot/src/lib.rs | 2 +- fendermint/vm/snapshot/src/manager.rs | 90 ++++++++++++++------------- 3 files changed, 59 insertions(+), 51 deletions(-) diff --git a/fendermint/app/src/cmd/run.rs b/fendermint/app/src/cmd/run.rs index c804e042..c0253ae8 100644 --- a/fendermint/app/src/cmd/run.rs +++ b/fendermint/app/src/cmd/run.rs @@ -15,7 +15,7 @@ use fendermint_vm_interpreter::{ signed::SignedMessageInterpreter, }; use fendermint_vm_resolver::ipld::IpldResolver; -use fendermint_vm_snapshot::SnapshotManager; +use fendermint_vm_snapshot::{SnapshotManager, SnapshotParams}; use fendermint_vm_topdown::proxy::IPCProviderProxy; use fendermint_vm_topdown::sync::launch_polling_syncer; use fendermint_vm_topdown::{CachedFinalityProvider, Toggle}; @@ -187,13 +187,15 @@ async fn run(settings: Settings) -> anyhow::Result<()> { let snapshots = if settings.snapshots.enabled { let (manager, client) = SnapshotManager::new( state_store.clone(), - settings.snapshots_dir(), - settings.snapshots.download_dir(), - settings.snapshots.block_interval, - settings.snapshots.chunk_size_bytes, - settings.snapshots.hist_size, - settings.snapshots.last_access_hold, - settings.snapshots.sync_poll_interval, + SnapshotParams { + snapshots_dir: settings.snapshots_dir(), + download_dir: settings.snapshots.download_dir(), + block_interval: settings.snapshots.block_interval, + chunk_size: settings.snapshots.chunk_size_bytes, + hist_size: settings.snapshots.hist_size, + last_access_hold: settings.snapshots.last_access_hold, + sync_poll_interval: settings.snapshots.sync_poll_interval, + }, ) .context("failed to create snapshot manager")?; diff --git a/fendermint/vm/snapshot/src/lib.rs b/fendermint/vm/snapshot/src/lib.rs index cd9e6b5c..aad78c79 100644 --- a/fendermint/vm/snapshot/src/lib.rs +++ b/fendermint/vm/snapshot/src/lib.rs @@ -18,6 +18,6 @@ const PARTS_DIR_NAME: &str = "parts"; pub use client::SnapshotClient; pub use error::SnapshotError; -pub use manager::SnapshotManager; +pub use manager::{SnapshotManager, SnapshotParams}; pub use manifest::SnapshotManifest; pub use state::SnapshotItem; diff --git a/fendermint/vm/snapshot/src/manager.rs b/fendermint/vm/snapshot/src/manager.rs index e91566b9..89f3b5c6 100644 --- a/fendermint/vm/snapshot/src/manager.rs +++ b/fendermint/vm/snapshot/src/manager.rs @@ -14,22 +14,31 @@ use fendermint_vm_interpreter::fvm::state::FvmStateParams; use fvm_ipld_blockstore::Blockstore; use tendermint_rpc::Client; -/// Create snapshots at regular block intervals. -pub struct SnapshotManager { - /// Blockstore - store: BS, +pub struct SnapshotParams { /// Location to store completed snapshots. - snapshots_dir: PathBuf, + pub snapshots_dir: PathBuf, + pub download_dir: PathBuf, + pub block_interval: BlockHeight, /// Target size in bytes for snapshot chunks. - chunk_size: usize, + pub chunk_size: usize, /// Number of snapshots to keep. /// /// 0 means unlimited. - hist_size: usize, + pub hist_size: usize, /// Time to hold on from purging a snapshot after a remote client /// asked for a chunk from it. - last_access_hold: Duration, + pub last_access_hold: Duration, /// How often to check CometBFT whether it has finished syncing. + pub sync_poll_interval: Duration, +} + +/// Create snapshots at regular block intervals. +pub struct SnapshotManager { + store: BS, + snapshots_dir: PathBuf, + chunk_size: usize, + hist_size: usize, + last_access_hold: Duration, sync_poll_interval: Duration, /// Shared state of snapshots. state: SnapshotState, @@ -43,36 +52,29 @@ where BS: Blockstore + Clone + Send + Sync + 'static, { /// Create a new manager. - pub fn new( - store: BS, - snapshots_dir: PathBuf, - download_dir: PathBuf, - block_interval: BlockHeight, - chunk_size: usize, - hist_size: usize, - last_access_hold: Duration, - sync_poll_interval: Duration, - ) -> anyhow::Result<(Self, SnapshotClient)> { + pub fn new(store: BS, params: SnapshotParams) -> anyhow::Result<(Self, SnapshotClient)> { // Make sure the target directory exists. - std::fs::create_dir_all(&snapshots_dir).context("failed to create snapshots directory")?; + std::fs::create_dir_all(¶ms.snapshots_dir) + .context("failed to create snapshots directory")?; - let snapshot_items = list_manifests(&snapshots_dir).context("failed to list manifests")?; + let snapshot_items = + list_manifests(¶ms.snapshots_dir).context("failed to list manifests")?; let state = SnapshotState::new(snapshot_items); - let manager = Self { + let manager: SnapshotManager = Self { store, - snapshots_dir, - chunk_size, - hist_size, - last_access_hold, - sync_poll_interval, + snapshots_dir: params.snapshots_dir, + chunk_size: params.chunk_size, + hist_size: params.hist_size, + last_access_hold: params.last_access_hold, + sync_poll_interval: params.sync_poll_interval, state: state.clone(), // Assume we are syncing until we can determine otherwise. is_syncing: TVar::new(true), }; - let client = SnapshotClient::new(download_dir, block_interval, state); + let client = SnapshotClient::new(params.download_dir, params.block_interval, state); Ok((manager, client)) } @@ -323,7 +325,7 @@ mod tests { use fvm::engine::MultiEngine; use quickcheck::Arbitrary; - use crate::{manifest, PARTS_DIR_NAME}; + use crate::{manager::SnapshotParams, manifest, PARTS_DIR_NAME}; use super::SnapshotManager; @@ -355,13 +357,15 @@ mod tests { let (snapshot_manager, snapshot_client) = SnapshotManager::new( store.clone(), - snapshots_dir.path().into(), - download_dir.path().into(), - 1, - 10000, - 1, - Duration::ZERO, - never_poll_sync, + SnapshotParams { + snapshots_dir: snapshots_dir.path().into(), + download_dir: download_dir.path().into(), + block_interval: 1, + chunk_size: 10000, + hist_size: 1, + last_access_hold: Duration::ZERO, + sync_poll_interval: never_poll_sync, + }, ) .expect("failed to create snapshot manager"); @@ -421,13 +425,15 @@ mod tests { // Create a new manager instance let (_, new_client) = SnapshotManager::new( store, - snapshots_dir.path().into(), - download_dir.path().into(), - 1, - 10000, - 1, - Duration::ZERO, - never_poll_sync, + SnapshotParams { + snapshots_dir: snapshots_dir.path().into(), + download_dir: download_dir.path().into(), + block_interval: 1, + chunk_size: 10000, + hist_size: 1, + last_access_hold: Duration::ZERO, + sync_poll_interval: never_poll_sync, + }, ) .expect("failed to create snapshot manager"); From a2e6f10ff4823607bda02dd1b339f0a5f4be7e88 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Tue, 12 Dec 2023 23:55:24 +0000 Subject: [PATCH 16/18] FM-363: Remove 'parts' --- fendermint/vm/snapshot/src/client.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fendermint/vm/snapshot/src/client.rs b/fendermint/vm/snapshot/src/client.rs index 7df5ffc9..2b9471c0 100644 --- a/fendermint/vm/snapshot/src/client.rs +++ b/fendermint/vm/snapshot/src/client.rs @@ -85,11 +85,6 @@ impl SnapshotClient { } else { match tempfile::tempdir_in(&self.download_dir) { Ok(dir) => { - // Create a `parts` sub-directory for the chunks. - if let Err(e) = std::fs::create_dir(dir.path().join("parts")) { - return abort(SnapshotError::from(e)); - }; - // Save the manifest into the temp directory; // that way we can always see on the file system what's happening. let json = match serde_json::to_string_pretty(&manifest) @@ -98,17 +93,23 @@ impl SnapshotClient { Ok(json) => json, Err(e) => return abort(SnapshotError::from(e)), }; - if let Err(e) = std::fs::write(dir.path().join(MANIFEST_FILE_NAME), json) { - return abort(SnapshotError::from(e)); - } - let download_path = dir.path().into(); + let download_path: PathBuf = dir.path().into(); let download = SnapshotDownload { manifest, download_dir: Arc::new(dir), next_index: TVar::new(0), }; + // Create a `parts` sub-directory for the chunks. + if let Err(e) = std::fs::create_dir(download.parts_dir()) { + return abort(SnapshotError::from(e)); + }; + + if let Err(e) = std::fs::write(download_path.join(MANIFEST_FILE_NAME), json) { + return abort(SnapshotError::from(e)); + } + self.state.current_download.write(Some(download))?; Ok(download_path) From c8a3cae40d7bb801c55810465c1da8fcc4f166c9 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Wed, 13 Dec 2023 12:40:37 +0000 Subject: [PATCH 17/18] FM-363: Tweak timings --- fendermint/testing/snapshot-test/Makefile.toml | 6 ++++-- fendermint/testing/snapshot-test/src/lib.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fendermint/testing/snapshot-test/Makefile.toml b/fendermint/testing/snapshot-test/Makefile.toml index 16f9b003..3acb916c 100644 --- a/fendermint/testing/snapshot-test/Makefile.toml +++ b/fendermint/testing/snapshot-test/Makefile.toml @@ -26,6 +26,7 @@ FM_SNAPSHOTS__CHUNK_SIZE_BYTES=1048576 FM_SNAPSHOTS__SYNC_POLL_INTERVAL=10 CMT_P2P_PEX=false CMT_P2P_MAX_NUM_OUTBOUND_PEERS=3 +CMT_CONSENSUS_TIMEOUT_COMMIT=1s EOF """ @@ -33,12 +34,12 @@ EOF [tasks.test] clear = true run_task = { name = [ - "snapshot-wait", - "snapshot-created", "node-1-setup", "node-1-sync-test", "node-2-setup", "node-2-sync-test", + "snapshot-wait", + "snapshot-created", "node-3-setup", "node-3-sync-test", ], fork = true, cleanup_task = "snapshot-teardown" } @@ -146,6 +147,7 @@ CMT_STATESYNC_RPC_SERVERS=http://snapshot-cometbft-1:26657,http://snapshot-comet CMT_STATESYNC_TRUST_HEIGHT=$TRUST_HEIGHT CMT_STATESYNC_TRUST_HASH=$TRUST_HASH CMT_STATESYNC_TEMP_DIR=/cometbft +CMT_STATESYNC_DISCOVERY_TIME=5s FM_SNAPSHOTS__DOWNLOAD_DIR=/data/${NODE_NAME}/fendermint/data EOL fi diff --git a/fendermint/testing/snapshot-test/src/lib.rs b/fendermint/testing/snapshot-test/src/lib.rs index 83abf86a..6b754bc7 100644 --- a/fendermint/testing/snapshot-test/src/lib.rs +++ b/fendermint/testing/snapshot-test/src/lib.rs @@ -21,7 +21,7 @@ //! ``` //! //! 2. One by one -//! //! ```text +//! ```text //! cd fendermint/testing/snapshot-test //! cargo make setup //! cargo make node-1 setup From 999b2e1ee4d7e507530f8a40657a6b7ebdae64ec Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Wed, 13 Dec 2023 12:40:51 +0000 Subject: [PATCH 18/18] FM-363: Fix snapshot height --- fendermint/app/src/app.rs | 29 ++++++++++++++++++++--------- fendermint/app/src/tmconv.rs | 6 +++++- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fendermint/app/src/app.rs b/fendermint/app/src/app.rs index 92856a40..0aacaa42 100644 --- a/fendermint/app/src/app.rs +++ b/fendermint/app/src/app.rs @@ -702,15 +702,6 @@ where "begin block" ); - // Notify the snapshotter. We don't do this in `commit` because *this* is the height at which - // this state has been officially associated with the application hash, which is something - // we will receive in `offer_snapshot` and we can compare. If we did it in `commit` we'd - // have to associate the snapshot with `block_height + 1`. But this way we also know that - // others have agreed with our results. - if let Some(ref snapshots) = self.snapshots { - atomically(|| snapshots.notify(block_height as u64, state_params.clone())).await; - } - state_params.timestamp = to_timestamp(request.header.time); let state = FvmExecState::new(db, self.multi_engine.as_ref(), block_height, state_params) @@ -833,6 +824,24 @@ where // notified about), we could add it to the `ChainMessageInterpreter` as a constructor argument, // a sort of "ambient state", and not worry about in in the `App` at all. + // Notify the snapshotter. It wasn't clear whether this should be done in `commit` or `begin_block`, + // that is, whether the _height_ of the snapshot should be `block_height` or `block_height+1`. + // When CometBFT calls `offer_snapshot` it sends an `app_hash` in it that we compare to the CID + // of the `state_params`. Based on end-to-end testing it looks like it gives the `app_hash` from + // the *next* block, so we have to do it here. + // For example: + // a) Notify in `begin_block`: say we are at committing block 899, then we notify in `begin_block` + // that block 900 has this state (so we use `block_height+1` in notification); + // CometBFT is going to offer it with the `app_hash` of block 901, which won't match, because + // by then the timestamp will be different in the state params after committing block 900. + // b) Notify in `commit`: say we are committing block 900 and notify immediately that it has this state + // (even though this state will only be available to query from the next height); + // CometBFT is going to offer it with the `app_hash` of 901, but in this case that's good, because + // that hash reflects the changes made by block 900, which this state param is the result of. + if let Some(ref snapshots) = self.snapshots { + atomically(|| snapshots.notify(block_height, state.state_params.clone())).await; + } + // Commit app state to the datastore. self.set_committed_state(state)?; @@ -969,6 +978,8 @@ where // Now insert the new state into the history. let mut state = self.committed_state()?; + + // The height reflects that it was produced in `commit`. state.block_height = snapshot.manifest.block_height; state.state_params = snapshot.manifest.state_params; self.set_committed_state(state)?; diff --git a/fendermint/app/src/tmconv.rs b/fendermint/app/src/tmconv.rs index a09b45bf..4ef0a605 100644 --- a/fendermint/app/src/tmconv.rs +++ b/fendermint/app/src/tmconv.rs @@ -400,7 +400,11 @@ pub fn from_snapshot( let app_hash = to_app_hash(&metadata.state_params); if app_hash != offer.app_hash { - bail!("the application hash does not match the metadata"); + bail!( + "the application hash does not match the metadata; from-meta = {}, from-offer = {}", + app_hash, + offer.app_hash, + ); } let checksum = tendermint::hash::Hash::try_from(offer.snapshot.hash)