Validator historical state restoration (#922)

* adds a new overload of queryPastEvents allowing to query past events based on timestamp in the past * adds state restoration to validator * refactors a bit to get the tests back to work * replaces deprecated generic methods from Market with methods for specific event types * Refactors binary search * adds market tests for querying past SlotFilled events and binary search * Takes into account that <<earliest>> block available is not necessarily the genesis block * Adds more logging and makes testing earliest block boundary more reliable * adds validation tests for historical state restoration * adds mockprovider to simplify and improve testing of the edge conditions * adds slot reservation to the new tests after rebasing * adds validation groups and group index in logs of validator * adds integration test with two validators * adds comment on how to enable logging in integration test executable itself * testIntegration: makes list is running nodes injected and available in the body of the test * validation: adds integration test for historical state * adds more logging to validator * integration test: validator only looks 30 days back for historical state * adds logging of the slotState when removing slots during validation * review and refactor validator integration tests * adds validation to the set of integration tests * Fixes mistyped name of the mock provider module in testMarket * Fixes a typo in the name of the validation suite in integration tests * Makes validation unit test a bit easier to follow * better use of logScopes to reduce duplication * improves timing and clarifies the test conditions * uses http as default RPC provider for nodes running in integration tests as a workaround for dropped subscriptions * simplifies the validation integration tests by waiting for failed request instead of tracking slots * adds config option allowing selectively to set different provider url * Brings back the default settings for RPC provider in integration tests * use http RPC provider for clients in validation integration tests * fine-tune the tests * Makes validator integration test more robust - adds extra tracking * brings tracking of marketplace event back to validator integration test * refactors integration tests * deletes tmp file * adds <<return>> after forcing integration test to fail preliminarily * re-enables all integration tests and matrix * stops debug output in CI * allows to choose a different RPC provider for a given integration test suite * fixes signature of <<getBlock>> method in mockProvider * adds missing import which seem to be braking integration tests on windows * makes sure that clients, SPs, and validators use the same provider url * makes validator integration tests using http at 127.0.0.1:8545 * testvalidator: stop resubscribing as we are now using http polling as rpc provider * applying review comments * groups queryPastStorage overrides together (review comment) * groups the historical validation tests into a sub suite * removes the temporary extensions in marketplacesuite and multinodesuite allowing to specify provider url * simplifies validation integration tests * Removes debug logs when waiting for request to fail * Renaming waitForRequestFailed => waitForRequestToFail * renames blockNumberForBlocksAgo to pastBlockTag and makes it private * removes redundant debugging logs * refines logging in validation * removes dev logging from mockmarket * improves exception handling in provider helper procs and prepares for extraction to a separate module * Uses chronos instead of std/times for Duration * extracts provider and binary search helpers to a separate module * removes redundant log entry params from validator * unifies the notation to consistently use method call syntax * reuses ProviderError from nim-ethers in the provider extension * clarifies the comment in multinodesuite * uses == operator to check the predefined tags and raises exception when `BlockTag.pending` is requested. * when waiting for request to fail, we break on any request state that is not Started * removes tests that were moved to testProvider from testMarket * extracts tests that use MockProvider to a separate async suite * improves performance of the historical state restoration * removing redundant log messages in validator (groupIndex and groups) * adds testProvider to testContracts group * removes unused import in testMarket
codex-storage · Dec 14, 2024 · 92a0eda · 92a0eda
1 parent 1f49f86
commit 92a0eda
Show file tree

Hide file tree

Showing 20 changed files with 1,005 additions and 98 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,7 +11,6 @@ env:
   cache_nonce: 0 # Allows for easily busting actions/cache caches
   nim_version: pinned
 
-
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
   cancel-in-progress: true
@@ -23,23 +22,23 @@ jobs:
       matrix: ${{ steps.matrix.outputs.matrix }}
       cache_nonce: ${{ env.cache_nonce }}
     steps:
-    - name: Compute matrix
-      id: matrix
-      uses: fabiocaccamo/create-matrix-action@v4
-      with:
-        matrix: |
-          os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {macos},   cpu {amd64}, builder {macos-13},       tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {macos},   cpu {amd64}, builder {macos-13},       tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {macos},   cpu {amd64}, builder {macos-13},       tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {macos},   cpu {amd64}, builder {macos-13},       tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {windows}, cpu {amd64}, builder {windows-latest}, tests {unittest},    nim_version {${{ env.nim_version }}}, shell {msys2}
-          os {windows}, cpu {amd64}, builder {windows-latest}, tests {contract},    nim_version {${{ env.nim_version }}}, shell {msys2}
-          os {windows}, cpu {amd64}, builder {windows-latest}, tests {integration}, nim_version {${{ env.nim_version }}}, shell {msys2}
-          os {windows}, cpu {amd64}, builder {windows-latest}, tests {tools},       nim_version {${{ env.nim_version }}}, shell {msys2}
+      - name: Compute matrix
+        id: matrix
+        uses: fabiocaccamo/create-matrix-action@v4
+        with:
+          matrix: |
+            os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {linux},   cpu {amd64}, builder {ubuntu-20.04},   tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {macos},   cpu {amd64}, builder {macos-13},       tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {macos},   cpu {amd64}, builder {macos-13},       tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {macos},   cpu {amd64}, builder {macos-13},       tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {macos},   cpu {amd64}, builder {macos-13},       tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+            os {windows}, cpu {amd64}, builder {windows-latest}, tests {unittest},    nim_version {${{ env.nim_version }}}, shell {msys2}
+            os {windows}, cpu {amd64}, builder {windows-latest}, tests {contract},    nim_version {${{ env.nim_version }}}, shell {msys2}
+            os {windows}, cpu {amd64}, builder {windows-latest}, tests {integration}, nim_version {${{ env.nim_version }}}, shell {msys2}
+            os {windows}, cpu {amd64}, builder {windows-latest}, tests {tools},       nim_version {${{ env.nim_version }}}, shell {msys2}
 
   build:
     needs: matrix

diff --git a/build.nims b/build.nims
@@ -41,6 +41,9 @@ task testContracts, "Build & run Codex Contract tests":
 task testIntegration, "Run integration tests":
   buildBinary "codex", params = "-d:chronicles_runtime_filtering -d:chronicles_log_level=TRACE -d:codex_enable_proof_failures=true"
   test "testIntegration"
+  # use params to enable logging from the integration test executable
+  # test "testIntegration", params = "-d:chronicles_sinks=textlines[notimestamps,stdout],textlines[dynamic] " &
+  #   "-d:chronicles_enabled_topics:integration:TRACE"  
 
 task build, "build codex binary":
   codexTask()

diff --git a/codex/contracts.nim b/codex/contracts.nim
@@ -2,8 +2,10 @@ import contracts/requests
 import contracts/marketplace
 import contracts/market
 import contracts/interactions
+import contracts/provider
 
 export requests
 export marketplace
 export market
 export interactions
+export provider
diff --git a/codex/contracts/market.nim b/codex/contracts/market.nim
@@ -1,6 +1,4 @@
-import std/sequtils
 import std/strutils
-import std/sugar
 import pkg/ethers
 import pkg/upraises
 import pkg/questionable
@@ -9,6 +7,7 @@ import ../logutils
 import ../market
 import ./marketplace
 import ./proofs
+import ./provider
 
 export market
 
@@ -467,18 +466,49 @@ method subscribeProofSubmission*(market: OnChainMarket,
 method unsubscribe*(subscription: OnChainMarketSubscription) {.async.} =
   await subscription.eventSubscription.unsubscribe()
 
-method queryPastEvents*[T: MarketplaceEvent](
+method queryPastSlotFilledEvents*(
   market: OnChainMarket,
-  _: type T,
-  blocksAgo: int): Future[seq[T]] {.async.} =
+  fromBlock: BlockTag): Future[seq[SlotFilled]] {.async.} =
 
   convertEthersError:
-    let contract = market.contract
-    let provider = contract.provider
+    return await market.contract.queryFilter(SlotFilled,
+                                             fromBlock,
+                                             BlockTag.latest)
 
-    let head = await provider.getBlockNumber()
-    let fromBlock = BlockTag.init(head - blocksAgo.abs.u256)
+method queryPastSlotFilledEvents*(
+  market: OnChainMarket,
+  blocksAgo: int): Future[seq[SlotFilled]] {.async.} =
+
+  convertEthersError:
+    let fromBlock =
+      await market.contract.provider.pastBlockTag(blocksAgo)
+
+    return await market.queryPastSlotFilledEvents(fromBlock)
+
+method queryPastSlotFilledEvents*(
+  market: OnChainMarket,
+  fromTime: SecondsSince1970): Future[seq[SlotFilled]] {.async.} =
+
+  convertEthersError:
+    let fromBlock = 
+      await market.contract.provider.blockNumberForEpoch(fromTime)
+    return await market.queryPastSlotFilledEvents(BlockTag.init(fromBlock))
+
+method queryPastStorageRequestedEvents*(
+  market: OnChainMarket,
+  fromBlock: BlockTag): Future[seq[StorageRequested]] {.async.} =
+
+  convertEthersError:
+    return await market.contract.queryFilter(StorageRequested,
+                                             fromBlock,
+                                             BlockTag.latest)
+
+method queryPastStorageRequestedEvents*(
+  market: OnChainMarket,
+  blocksAgo: int): Future[seq[StorageRequested]] {.async.} =
+
+  convertEthersError:
+    let fromBlock =
+      await market.contract.provider.pastBlockTag(blocksAgo)
 
-    return await contract.queryFilter(T,
-                                      fromBlock,
-                                      BlockTag.latest)
+    return await market.queryPastStorageRequestedEvents(fromBlock)
diff --git a/codex/contracts/provider.nim b/codex/contracts/provider.nim
@@ -0,0 +1,126 @@
+import pkg/ethers/provider
+import pkg/chronos
+import pkg/questionable
+
+import ../logutils
+
+from ../clock import SecondsSince1970
+
+logScope:
+  topics = "marketplace onchain provider"
+
+proc raiseProviderError(message: string) {.raises: [ProviderError].} =
+  raise newException(ProviderError, message)
+
+proc blockNumberAndTimestamp*(provider: Provider, blockTag: BlockTag):
+    Future[(UInt256, UInt256)] {.async: (raises: [ProviderError]).} =
+  without latestBlock =? await provider.getBlock(blockTag):
+    raiseProviderError("Could not get latest block")
+
+  without latestBlockNumber =? latestBlock.number:
+    raiseProviderError("Could not get latest block number")
+
+  return (latestBlockNumber, latestBlock.timestamp)
+
+proc binarySearchFindClosestBlock(
+    provider: Provider,
+    epochTime: int,
+    low: UInt256,
+    high: UInt256): Future[UInt256] {.async: (raises: [ProviderError]).} =
+  let (_, lowTimestamp) =
+    await provider.blockNumberAndTimestamp(BlockTag.init(low))
+  let (_, highTimestamp) =
+    await provider.blockNumberAndTimestamp(BlockTag.init(high))
+  if abs(lowTimestamp.truncate(int) - epochTime) <
+      abs(highTimestamp.truncate(int) - epochTime):
+    return low
+  else:
+    return high
+
+proc binarySearchBlockNumberForEpoch(
+    provider: Provider,
+    epochTime: UInt256,
+    latestBlockNumber: UInt256,
+    earliestBlockNumber: UInt256): Future[UInt256]
+      {.async: (raises: [ProviderError]).} =
+  var low = earliestBlockNumber
+  var high = latestBlockNumber
+
+  while low <= high:
+    if low == 0 and high == 0:
+      return low
+    let mid = (low + high) div 2
+    let (midBlockNumber, midBlockTimestamp) =
+      await provider.blockNumberAndTimestamp(BlockTag.init(mid))
+
+    if midBlockTimestamp < epochTime:
+      low = mid + 1
+    elif midBlockTimestamp > epochTime:
+      high = mid - 1
+    else:
+      return midBlockNumber
+  # NOTICE that by how the binary search is implemented, when it finishes
+  # low is always greater than high - this is why we use high, where
+  # intuitively we would use low:
+  await provider.binarySearchFindClosestBlock(
+    epochTime.truncate(int), low=high, high=low)
+
+proc blockNumberForEpoch*(
+    provider: Provider,
+    epochTime: SecondsSince1970): Future[UInt256]
+      {.async: (raises: [ProviderError]).} =
+  let epochTimeUInt256 = epochTime.u256
+  let (latestBlockNumber, latestBlockTimestamp) = 
+    await provider.blockNumberAndTimestamp(BlockTag.latest)
+  let (earliestBlockNumber, earliestBlockTimestamp) = 
+    await provider.blockNumberAndTimestamp(BlockTag.earliest)
+
+  # Initially we used the average block time to predict
+  # the number of blocks we need to look back in order to find
+  # the block number corresponding to the given epoch time. 
+  # This estimation can be highly inaccurate if block time
+  # was changing in the past or is fluctuating and therefore
+  # we used that information initially only to find out
+  # if the available history is long enough to perform effective search.
+  # It turns out we do not have to do that. There is an easier way.
+  #
+  # First we check if the given epoch time equals the timestamp of either
+  # the earliest or the latest block. If it does, we just return the
+  # block number of that block.
+  #
+  # Otherwise, if the earliest available block is not the genesis block, 
+  # we should check the timestamp of that earliest block and if it is greater
+  # than the epoch time, we should issue a warning and return
+  # that earliest block number.
+  # In all other cases, thus when the earliest block is not the genesis
+  # block but its timestamp is not greater than the requested epoch time, or
+  # if the earliest available block is the genesis block, 
+  # (which means we have the whole history available), we should proceed with
+  # the binary search.
+  #
+  # Additional benefit of this method is that we do not have to rely
+  # on the average block time, which not only makes the whole thing
+  # more reliable, but also easier to test.
+
+  # Are lucky today?
+  if earliestBlockTimestamp == epochTimeUInt256:
+    return earliestBlockNumber
+  if latestBlockTimestamp == epochTimeUInt256:
+    return latestBlockNumber
+
+  if earliestBlockNumber > 0 and earliestBlockTimestamp > epochTimeUInt256:
+    let availableHistoryInDays = 
+        (latestBlockTimestamp - earliestBlockTimestamp) div
+          1.days.secs.u256
+    warn "Short block history detected.", earliestBlockTimestamp =  
+      earliestBlockTimestamp, days = availableHistoryInDays
+    return earliestBlockNumber
+
+  return await provider.binarySearchBlockNumberForEpoch(
+    epochTimeUInt256, latestBlockNumber, earliestBlockNumber)
+
+proc pastBlockTag*(provider: Provider,
+                   blocksAgo: int):
+                     Future[BlockTag] {.async: (raises: [ProviderError]).} =
+  let head = await provider.getBlockNumber()
+  return BlockTag.init(head - blocksAgo.abs.u256)
diff --git a/codex/market.nim b/codex/market.nim
@@ -246,8 +246,27 @@ method subscribeProofSubmission*(market: Market,
 method unsubscribe*(subscription: Subscription) {.base, async, upraises:[].} =
   raiseAssert("not implemented")
 
-method queryPastEvents*[T: MarketplaceEvent](
-  market: Market,
-  _: type T,
-  blocksAgo: int): Future[seq[T]] {.base, async.} =
+method queryPastSlotFilledEvents*(
+    market: Market,
+    fromBlock: BlockTag): Future[seq[SlotFilled]] {.base, async.} =
+  raiseAssert("not implemented")
+
+method queryPastSlotFilledEvents*(
+    market: Market,
+    blocksAgo: int): Future[seq[SlotFilled]] {.base, async.} =
+  raiseAssert("not implemented")
+
+method queryPastSlotFilledEvents*(
+    market: Market,
+    fromTime: SecondsSince1970): Future[seq[SlotFilled]] {.base, async.} =
+  raiseAssert("not implemented")
+
+method queryPastStorageRequestedEvents*(
+    market: Market,
+    fromBlock: BlockTag): Future[seq[StorageRequested]] {.base, async.} =
+  raiseAssert("not implemented")
+
+method queryPastStorageRequestedEvents*(
+    market: Market,
+    blocksAgo: int): Future[seq[StorageRequested]] {.base, async.} =
   raiseAssert("not implemented")
diff --git a/codex/validation.nim b/codex/validation.nim
@@ -23,6 +23,9 @@ type
     proofTimeout: UInt256
     config: ValidationConfig
 
+const
+  MaxStorageRequestDuration = 30.days
+
 logScope:
   topics = "codex validator"
 
@@ -56,15 +59,15 @@ func maxSlotsConstraintRespected(validation: Validation): bool =
     validation.slots.len < validation.config.maxSlots
 
 func shouldValidateSlot(validation: Validation, slotId: SlotId): bool =
-  if (validationGroups =? validation.config.groups):
-    (groupIndexForSlotId(slotId, validationGroups) ==
-    validation.config.groupIndex) and
-    validation.maxSlotsConstraintRespected
-  else:
-    validation.maxSlotsConstraintRespected
+  without validationGroups =? validation.config.groups:
+    return true
+  groupIndexForSlotId(slotId, validationGroups) ==
+    validation.config.groupIndex
 
 proc subscribeSlotFilled(validation: Validation) {.async.} =
   proc onSlotFilled(requestId: RequestId, slotIndex: UInt256) =
+    if not validation.maxSlotsConstraintRespected:
+      return
     let slotId = slotId(requestId, slotIndex)
     if validation.shouldValidateSlot(slotId):
       trace "Adding slot", slotId
@@ -78,7 +81,7 @@ proc removeSlotsThatHaveEnded(validation: Validation) {.async.} =
   for slotId in slots:
     let state = await validation.market.slotState(slotId)
     if state != SlotState.Filled:
-      trace "Removing slot", slotId
+      trace "Removing slot", slotId, slotState = state
       ended.incl(slotId)
   validation.slots.excl(ended)
 
@@ -119,14 +122,37 @@ proc run(validation: Validation) {.async.} =
   except CatchableError as e:
     error "Validation failed", msg = e.msg
 
+proc epochForDurationBackFromNow(validation: Validation,
+    duration: Duration): SecondsSince1970 =
+  return validation.clock.now - duration.secs
+
+proc restoreHistoricalState(validation: Validation) {.async.} =
+  trace "Restoring historical state..."
+  let startTimeEpoch = validation.epochForDurationBackFromNow(MaxStorageRequestDuration)
+  let slotFilledEvents = await validation.market.queryPastSlotFilledEvents(
+    fromTime = startTimeEpoch)
+  for event in slotFilledEvents:
+    if not validation.maxSlotsConstraintRespected:
+      break
+    let slotId = slotId(event.requestId, event.slotIndex)
+    let slotState = await validation.market.slotState(slotId)
+    if slotState == SlotState.Filled and validation.shouldValidateSlot(slotId):
+      trace "Adding slot [historical]", slotId
+      validation.slots.incl(slotId)
+  trace "Historical state restored", numberOfSlots = validation.slots.len
+
 proc start*(validation: Validation) {.async.} =
+  trace "Starting validator", groups = validation.config.groups,
+    groupIndex = validation.config.groupIndex
   validation.periodicity = await validation.market.periodicity()
   validation.proofTimeout = await validation.market.proofTimeout()
   await validation.subscribeSlotFilled()
+  await validation.restoreHistoricalState()
   validation.running = validation.run()
 
 proc stop*(validation: Validation) {.async.} =
-  await validation.running.cancelAndWait()
+  if not isNil(validation.running):
+    await validation.running.cancelAndWait()
   while validation.subscriptions.len > 0:
     let subscription = validation.subscriptions.pop()
     await subscription.unsubscribe()