Skip to content

Commit

Permalink
[rust] legacy compatibility for forensic db (#329)
Browse files Browse the repository at this point in the history
  • Loading branch information
sirouk authored Jan 8, 2025
1 parent 2a310b2 commit 362ea77
Show file tree
Hide file tree
Showing 104 changed files with 30,748 additions and 259 deletions.
30 changes: 28 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
resolver = "2"

members = [
"compatibility",
"framework",
"framework/cached-packages",
"smoke-tests",
Expand Down Expand Up @@ -33,6 +34,7 @@ version = "7.0.3"
[workspace.dependencies]
######## Internal crate dependencies ########
# Internal crate dependencies.
libra-backwards-compatibility = { path = "compatibility" }
libra-cached-packages = { path = "framework/cached-packages" }
libra-config = { path = "tools/config" }
libra-framework = { path = "framework" }
Expand Down Expand Up @@ -63,6 +65,7 @@ diem-rest-client = { git = "https://github.com/0LNetworkCommunity/diem.git", bra
diem-sdk = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-config = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-crypto = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-crypto-derive = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-genesis = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-global-constants = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
diem-keygen = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
Expand Down Expand Up @@ -337,7 +340,7 @@ move-compiler = { git = "https://github.com/0LNetworkCommunity/diem.git", branch
move-model = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
move-vm-test-utils = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
move-vm-types = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
move-vm-runtime = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
move-vm-runtime = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }

# uses a profile similar to `cli` in Diem/Cargo.toml
# optimized for performance and size
Expand Down
Binary file added compatibility/.DS_Store
Binary file not shown.
37 changes: 37 additions & 0 deletions compatibility/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[package]
name = "libra-backwards-compatibility"

# Workspace inherited keys
authors = { workspace = true }
version = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
license = { workspace = true }
publish = { workspace = true }
repository = { workspace = true }
rust-version = { workspace = true }


[dependencies]
anyhow = { workspace = true }
bcs = { workspace = true }
diem-api-types = { workspace = true }
diem-backup-cli = { workspace = true }
diem-config = { workspace = true }
diem-crypto = { workspace = true }
diem-crypto-derive = { workspace = true }
diem-types = { workspace = true }
move-core-types = { workspace = true }
hex = { workspace = true }
libra-types = { workspace = true }
once_cell = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tiny-keccak = { workspace = true }
bytes = { workspace = true }
serde_bytes = { workspace = true }
rand = { workspace = true }

[dev-dependencies]
diem-temppath = { workspace = true }
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":119757649,"root_hash":"d2e6b6c474529cd3c3ea76cf211a528ee28bb13d7bcd8dcbef0ecf4c43bc3703","chunks":[{"first_idx":0,"last_idx":17338,"first_key":"000131122524ba9f4a13bd90a8b13c5d03ab621649c8100c6bbf7846fe8eaf0f","last_key":"fffed52c1dd93cc7aa5b79a8699df0c33eb4d65ab8d2a21bbe79f9d05ad62377","blobs":"state_ver_119757649.17a8/0-.chunk","proof":"state_ver_119757649.17a8/0-17338.proof"}],"proof":"state_ver_119757649.17a8/state.proof"}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"first_version":141722729,"last_version":141722729,"chunks":[{"first_version":141722729,"last_version":141722729,"transactions":"transaction_141722729-.891d/141722729-.chunk","proof":"transaction_141722729-.891d/141722729-141722729.proof"}]}
73 changes: 73 additions & 0 deletions compatibility/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

# Legacy Backwards Compatibility

# TL;DR
The encoding of the bytes in libra uses `BCS` which is a de/serialization implementation using `serde`. Libra Version Six and beyond cannot decode transaction logs or state snapshots from Libra Version Five without these tools.

# Explain

Version Six was a kitchen sink upgrade with a new genesis since there were upgrades throughout the stack that would have created a discontinuity in blocks anyhow. The bytes in db, logs, and backups preceding V6 had slightly different memory layouts.

Since the Move language address format and data structure names have changed no lookup can be successful because:
1. the keys would have had different hashes, and
2. the value bytes had different encoding layouts.

This means between V5 and V6 any query tool of the chain state (and Rust internal K-V structure) will fail to match key, or decode the value.

# Principal PITAs

1. Backup Manifests have changed layout. State Snapshot Manifests JSON files have changed ever so slightly, they previously did not include the `epoch` field. Reading V5 backup archive manifests would fail with V6+ tooling.

2. `AccountStateBlob` stored bytes in records are not what they seem. Vendor gifts you this koan: "What's the sound of recursion that goes nowhere?". In the State Snapshot backup files, each chunk is represented by a tuple of `(HashedValue, AccountStateBlob)`. However, AccountStateBlob already includes a `hash` field for HashedValue. For reasons, this field was flagged to be skipped be de/serializer. In practice the bytes at rest are prepended by the hash, and not post-pended. For clarity we added a definition of `AccountStateBlobRecord`.

3. `HashValue` is evil: The HashValue layout has not changed, but it invokes `loop garoo`, and the handcrafted deserializer of `HashedValue` uses a different intermediary representation for the byte layout.

```
// V5:
#[derive(::serde::Deserialize)]
#[serde(rename = "HashValue")]
struct Value<'a>(&'a [u8]);
// V6:
struct Value<'a> {
hash: &'a [u8; HashValue::LENGTH],
}
```

4. `AccountAddress` makes everything fail: fixed lengths have changed, from V5 to V6 the addresses doubled in size (16 to 32 bits). No KV lookup will work because the byte-encoded key always has the Core Code Address, (0x1) which changed from being prepended with 31 zeros (for 16 bits), to 63 zeros (32 bits). So all language_storage.rs structs are changed to use `LegacyAddressV5`.


## Compatibility Structs and Access Vectors
Structs from V5 with their specific fields, string representations, and Account Address formats have been provided here.

Looking up data in K-V representations of bytes is done with byte encoded `access_paths_vector`. The access path is always prepended with the AccountAddress of the core address account (which went from 16 bits to 32). Access Vector also uses the Byte encoded string name of the module, these have changes in the Move specification to being lower-cased module names, and pascal cased struct names.
```
String representations of access path bytes as the lookup key in the K-V.
// V5:
resource_key = "0x00000000000000000000000000000001::DiemAccount::DiemAccount"
// V6:
resource_key = "0x0000000000000000000000000000000000000000000000000000000000000001::diem_account::DiemAccount"
```

This compatibility library ports the some V5 Rust code so that certain elemental types will use the correct V5 layout, e.g.: StructTag, TypeTag, HashValue, AccountAddress.


## Troublshooting Decoding
For any failed `bcs::from_bytes()` you will likely receive a `remaining input` error. Vendor gifts you this koan. This either means the `key` of the access_vector was not found in the Btree's bytes. Most likely it's because the V5 access_vector uses fewer bytes than the V6 tools expects (because of the account address character/byte count difference).


# Tests
The principal tests to run are in `state_snapshot_v5.rs`, where we try to sanity test the encoding and decoding of structs for the v5 language elements.


# References

```
$ >
$ > Loop Garoo
Goin' down to junk anew?
$ > Loop Garoo
Goin' put my hook to you
```
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,66 +15,46 @@ use super::legacy_address::LegacyAddress;
/// Struct that represents a AutoPay resource
#[derive(Debug, Serialize, Deserialize)]
pub struct AutoPayResource {
///
pub payment: Vec<Payment>,
///
pub prev_bal: u64,
pub payment: Vec<Payment>,
pub prev_bal: u64,
}

/// Struct that represents a view for AutoPay resource
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AutoPayView {
///
pub payments: Vec<PaymentView>,
///
pub recurring_sum: u64,
pub payments: Vec<PaymentView>,
pub recurring_sum: u64,
}

/// Autopay instruction
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PaymentView {
///
pub uid: u64,
///
pub in_type: u8,
///
pub type_desc: String,
///
pub payee: LegacyAddress,
///
pub end_epoch: u64,
///
pub prev_bal: u64,
///
pub amt: u64,
///
pub amount: String,
///
pub note: Option<String>,
pub uid: u64,
pub in_type: u8,
pub type_desc: String,
pub payee: LegacyAddress,
pub end_epoch: u64,
pub prev_bal: u64,
pub amt: u64,
pub amount: String,
pub note: Option<String>,
}

impl PaymentView {
///
pub fn is_percent_of_change(&self) -> bool {
pub fn is_percent_of_change(&self) -> bool {
self.in_type == 1u8
}
}

/// Autopay instruction
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Payment {
///
pub uid: u64,
///
pub in_type: u8,
///
pub payee: LegacyAddress,
///
pub end_epoch: u64,
///
pub prev_bal: u64,
///
pub amt: u64,
pub uid: u64,
pub in_type: u8,
pub payee: LegacyAddress,
pub end_epoch: u64,
pub prev_bal: u64,
pub amt: u64,
}

impl Payment {
Expand Down Expand Up @@ -105,32 +85,27 @@ impl Payment {
// impl MoveResource for AutoPayResource {}

impl AutoPayResource {
// ///
// pub fn struct_tag() -> StructTag {
// // pub fn struct_tag() -> StructTag {
// StructTag {
// address: CORE_CODE_ADDRESS,
// module: AutoPayResource::module_identifier(),
// name: AutoPayResource::struct_identifier(),
// type_params: vec![],
// }
// }
// ///
// pub fn access_path(account: LegacyAddress) -> AccessPath {
// // pub fn access_path(account: LegacyAddress) -> AccessPath {
// let resource_key = ResourceKey::new(account, AutoPayResource::struct_tag());
// AccessPath::resource_access_path(resource_key)
// }
// ///
// pub fn resource_path() -> Vec<u8> {
// // pub fn resource_path() -> Vec<u8> {
// AccessPath::resource_access_vec(AutoPayResource::struct_tag())
// }

///
pub fn try_from_bytes(bytes: &[u8]) -> Result<Self> {
pub fn try_from_bytes(bytes: &[u8]) -> Result<Self> {
bcs::from_bytes(bytes).map_err(Into::into)
}

// ///
// pub fn get_view(&self) -> AutoPayView {
// // pub fn get_view(&self) -> AutoPayView {
// let payments = self.payment.iter().map(|each| {
// PaymentView {
// uid: each.uid,
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,12 @@ use serde::{Deserialize, Serialize};
/// Struct that represents a CurrencyInfo resource
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FullnodeCounterResource {
///
pub proofs_submitted_in_epoch: u64,
///
pub proofs_paid_in_epoch: u64,
///
pub subsidy_in_epoch: u64,
///
pub cumulative_proofs_submitted: u64,
///
pub cumulative_proofs_paid: u64,
///
pub cumulative_subsidy: u64,
pub proofs_submitted_in_epoch: u64,
pub proofs_paid_in_epoch: u64,
pub subsidy_in_epoch: u64,
pub cumulative_proofs_submitted: u64,
pub cumulative_proofs_paid: u64,
pub cumulative_subsidy: u64,
}

impl MoveStructType for FullnodeCounterResource {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,15 @@ pub struct GasResource {
}

impl GasResource {
///
pub fn new(value: u64) -> Self {
pub fn new(value: u64) -> Self {
Self { value }
}

///
pub fn value(&self) -> u64 {
pub fn value(&self) -> u64 {
self.value
}

///
pub fn struct_tag() -> StructTag {
pub fn struct_tag() -> StructTag {
StructTag {
address: CORE_CODE_ADDRESS,
name: GasResource::struct_identifier(),
Expand All @@ -44,8 +41,7 @@ impl GasResource {
}
}

///
pub fn access_path_for() -> Vec<u8> {
pub fn access_path_for() -> Vec<u8> {
AccessPath::resource_access_vec(GasResource::struct_tag())
}
}
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 362ea77

Please sign in to comment.