diff --git a/Cargo.lock b/Cargo.lock index c4c3461..4b3ecee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + [[package]] name = "anyhow" version = "1.0.86" @@ -63,6 +69,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + [[package]] name = "cc" version = "1.1.5" @@ -75,6 +87,46 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +dependencies = [ + "anstyle", + "clap_lex", + "strsim", + "terminal_size", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.71", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + [[package]] name = "collection_literals" version = "1.0.1" @@ -111,6 +163,22 @@ dependencies = [ "syn 2.0.71", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "flate2" version = "1.0.30" @@ -167,6 +235,18 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "idna" version = "0.5.0" @@ -177,6 +257,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "interpolator" version = "0.5.0" @@ -195,6 +285,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "log" version = "0.4.22" @@ -207,6 +303,7 @@ version = "0.1.0" dependencies = [ "anyhow", "assert_matches", + "clap", "get-size", "rstest", "serde", @@ -215,6 +312,7 @@ dependencies = [ "thousands", "time", "ureq", + "url", ] [[package]] @@ -379,7 +477,7 @@ dependencies = [ "libc", "spin", "untrusted", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -418,6 +516,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "rustls" version = "0.23.11" @@ -488,6 +599,7 @@ version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ + "indexmap", "itoa", "ryu", "serde", @@ -505,6 +617,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.6.1" @@ -532,6 +650,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "thiserror" version = "1.0.62" @@ -655,6 +783,7 @@ dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -678,13 +807,37 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -693,28 +846,46 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -727,24 +898,48 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index 947a498..160cacb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "manifest-size" version = "0.1.0" edition = "2021" -rust-version = "1.67" +rust-version = "1.74" description = "Calculate bytes consumed by Zarr manifests" authors = ["John Thorvald Wodder II "] repository = "https://github.com/jwodder/manifest-size" @@ -13,13 +13,15 @@ publish = false [dependencies] anyhow = "1.0.86" +clap = { version = "4.3.24", default-features = false, features = ["derive", "error-context", "help", "std", "suggestions", "usage", "wrap_help"] } get-size = { version = "0.1.4", features = ["derive"] } serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.120" +serde_json = { version = "1.0.120", features = ["preserve_order"] } thiserror = "1.0.62" thousands = "0.2.0" time = { version = "0.3.36", features = ["parsing", "serde"] } ureq = "2.10.0" +url = { version = "2.5.2", features = ["serde"] } [dev-dependencies] assert_matches = "1.5.0" diff --git a/README.md b/README.md index 1f982b4..b3a8dd1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![Project Status: Concept – Minimal or no implementation has been done yet, or the repository is only intended to be a limited example, demo, or proof-of-concept.](https://www.repostatus.org/badges/latest/concept.svg)](https://www.repostatus.org/#concept) -[![Minimum Supported Rust Version](https://img.shields.io/badge/MSRV-1.67-orange)](https://www.rust-lang.org) +[![Minimum Supported Rust Version](https://img.shields.io/badge/MSRV-1.74-orange)](https://www.rust-lang.org) [![MIT License](https://img.shields.io/github/license/jwodder/nhmoon.svg)](https://opensource.org/licenses/MIT) This is a Rust utility for measuring the sizes of individual [Zarr manifests][] @@ -29,18 +29,19 @@ with: Usage ===== - manifest-size + manifest-size [-J|--json] or, if running a localized binary: - cargo run [--release] -- + cargo run [--release] -- [-J|--json] -`manifest-size` takes a single argument, an HTTP(S) URL pointing to a Zarr -manifest file. It downloads the file, parses it, and outputs the size of the -data before & after parsing. +`manifest-size` takes a single positional argument, an HTTP(S) URL pointing to +a Zarr manifest file. It downloads the file, parses it, and outputs the size +of the data before & after parsing. If the `-J`/`--json` option is supplied, +the output will be in JSON. -Example -======= +Examples +======== ```console $ manifest-size https://datasets.datalad.org/dandi/zarr-manifests/zarr-manifests-v2-sorted/001/e3b/001e3b6d-26fb-463f-af28-520a25680ab4/326273bcc8730474323a66ea4e3daa49-113328--97037755426.json @@ -48,5 +49,14 @@ Raw response: 13 818 966 bytes Parsed response: 18 473 917 bytes ``` +```console +$ manifest-size --json https://datasets.datalad.org/dandi/zarr-manifests/zarr-manifests-v2-sorted/c7e/25d/c7e25dca-4dc9-4e83-a0d7-5fee56fa8773/c23f15b26134d808b072b8c93b1eeed8-48935--29709893986.json +{ + "url": "https://datasets.datalad.org/dandi/zarr-manifests/zarr-manifests-v2-sorted/c7e/25d/c7e25dca-4dc9-4e83-a0d7-5fee56fa8773/c23f15b26134d808b072b8c93b1eeed8-48935--29709893986.json", + "raw_bytes": 5935826, + "parsed_bytes": 7983735 +} +``` + [Zarr manifests]: https://github.com/dandi/dandidav/blob/main/doc/zarrman.md [`dandidav`]: https://github.com/dandi/dandidav diff --git a/src/main.rs b/src/main.rs index cbdf710..6b294fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,27 +5,54 @@ mod component; mod manifest; use crate::manifest::Manifest; use anyhow::Context; +use clap::Parser; use get_size::GetSize; use std::io::Read; use thousands::Separable; +use url::Url; + +/// Display the raw & parsed sizes of a Zarr manifest +/// +/// See for more information. +#[derive(Clone, Debug, Eq, Parser, PartialEq)] +struct Arguments { + /// Output JSON + #[arg(short = 'J', long)] + json: bool, + + /// An HTTP(S) URL pointing to a Zarr manifest file + url: Url, +} fn main() -> anyhow::Result<()> { - let Some(url) = std::env::args().nth(1) else { - anyhow::bail!("No URL argument supplied"); - }; - let mut r = ureq::get(&url) + let args = Arguments::parse(); + let mut r = ureq::get(args.url.as_str()) .call() .context("GET request failed")? .into_reader(); let mut body = Vec::new(); r.read_to_end(&mut body) .context("failed to read response body")?; - let body_len_str = body.len().separate_with_spaces(); + let body_len = body.len(); let parsed = serde_json::from_slice::(&body).context("failed to deserialize response")?; - let parsed_size_str = parsed.get_size().separate_with_spaces(); - let width = body_len_str.len().max(parsed_size_str.len()); - println!("Raw response: {body_len_str:>width$} bytes"); - println!("Parsed response: {parsed_size_str:>width$} bytes"); + let parsed_size = parsed.get_size(); + drop(parsed); + if args.json { + println!( + "{:#}", + serde_json::json!({ + "url": args.url, + "raw_bytes": body_len, + "parsed_bytes": parsed_size, + }) + ); + } else { + let body_len_str = body_len.separate_with_spaces(); + let parsed_size_str = parsed_size.separate_with_spaces(); + let width = body_len_str.len().max(parsed_size_str.len()); + println!("Raw response: {body_len_str:>width$} bytes"); + println!("Parsed response: {parsed_size_str:>width$} bytes"); + } Ok(()) }