Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Turbopack] track sizes and histogram #75112

Draft
wants to merge 6 commits into
base: canary
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions turbopack/crates/turbo-tasks-auto-hash-map/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,14 @@ impl<K, V, H, const I: usize> AutoMap<K, V, H, I> {
}
}

/// see [HashMap::capacity](https://doc.rust-lang.org/std/collections/struct.HashMap.html#method.capacity)
pub fn capacity(&self) -> usize {
match self {
AutoMap::List(list) => list.capacity(),
AutoMap::Map(map) => map.capacity(),
}
}

/// see [HashMap::values_mut](https://doc.rust-lang.org/std/collections/struct.HashMap.html#method.values_mut)
pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> {
match self {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,41 @@ impl DynamicStorage {
self.get_map(ty).map(|m| m.len()).unwrap_or_default()
}

pub fn len(&self) -> usize {
self.map.iter().map(|m| m.len()).sum()
}

pub fn capacity(&self) -> usize {
self.map.iter().map(|m| m.capacity()).sum()
}

pub fn size(&self) -> usize {
self.map.iter().map(|m| m.size()).sum::<usize>()
+ self.map.len() * size_of::<CachedDataItemStorage>()
}

pub fn size_of_type(&self, ty: CachedDataItemType) -> usize {
self.get_map(ty)
.map(|m| m.size() + size_of::<CachedDataItemStorage>())
.unwrap_or_default()
}

pub fn capacity_size(&self) -> usize {
self.map.iter().map(|m| m.capacity_size()).sum::<usize>()
+ self.map.capacity() * size_of::<CachedDataItemStorage>()
}

pub fn iter(
&self,
ty: CachedDataItemType,
) -> impl Iterator<Item = (CachedDataItemKey, CachedDataItemValueRef<'_>)> {
self.get_map(ty).map(|m| m.iter()).into_iter().flatten()
}

pub fn types(&self) -> impl Iterator<Item = CachedDataItemType> + '_ {
self.map.iter().map(|storage| storage.ty())
}

pub fn iter_all(
&self,
) -> impl Iterator<Item = (CachedDataItemKey, CachedDataItemValueRef<'_>)> {
Expand Down
38 changes: 38 additions & 0 deletions turbopack/crates/turbo-tasks-backend/src/backend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,44 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
}

fn stop(&self) {
{
let size = self.storage.size();
let capacity = self.storage.capacity_size();
let tasks = self.storage.tasks();
let data_len = self.storage.data_len();
let data_capacity = self.storage.data_capacity();
println!(
"Storage size {}MiB, capactiy {}MiB, {} tasks\nData entries {}M, capacity {}M",
size / 1024 / 1024,
capacity / 1024 / 1024,
tasks,
data_len / 1000 / 1000,
data_capacity / 1000 / 1000
);
{
let count_histograms = self.storage.count_histogram();
let mut count_histograms = count_histograms.into_iter().collect::<Vec<_>>();
count_histograms.sort_by_key(|(key, _)| *key);
for (key, mut histogram) in count_histograms {
histogram.add_zero_by_total(tasks);
println!("### {:?} count\n{:?}", key, histogram);
}
}
{
let sizes = self.storage.size_by_type();
let mut sizes = sizes.into_iter().collect::<Vec<_>>();
sizes.sort_by_key(|(key, _)| *key);
for (key, size) in sizes {
println!(
"{:?} = {} {:.2}MiB",
key,
size,
size as f64 / 1024.0 / 1024.0
);
}
}
}

if let Err(err) = self.backing_storage.shutdown() {
println!("Shutting down failed: {}", err);
}
Expand Down
128 changes: 128 additions & 0 deletions turbopack/crates/turbo-tasks-backend/src/backend/storage.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{
collections::HashMap,
hash::{BuildHasherDefault, Hash},
ops::{Deref, DerefMut},
thread::available_parallelism,
Expand All @@ -15,6 +16,7 @@ use crate::{
CachedDataItemValue, CachedDataItemValueRef, CachedDataItemValueRefMut, OutputValue,
},
data_storage::{AutoMapStorage, OptionStorage},
histogram::Histogram,
utils::dash_map_multi::{get_multiple_mut, RefMut},
};

Expand Down Expand Up @@ -141,6 +143,80 @@ impl InnerStorage {
}
}

pub fn len(&self) -> usize {
use crate::data_storage::Storage;
self.dynamic.len()
+ self.aggregation_number.len()
+ self.output.len()
+ self.upper.len()
+ self.output_dependent.len()
}

pub fn capacity(&self) -> usize {
use crate::data_storage::Storage;
self.dynamic.capacity()
+ self.aggregation_number.capacity()
+ self.output.capacity()
+ self.upper.capacity()
+ self.output_dependent.capacity()
}

pub fn size(&self) -> usize {
use crate::data_storage::Storage;
self.dynamic.size()
+ self.aggregation_number.size()
+ self.output.size()
+ self.upper.size()
+ self.output_dependent.size()
}

pub fn capacity_size(&self) -> usize {
use crate::data_storage::Storage;
self.dynamic.capacity_size()
+ self.aggregation_number.capacity_size()
+ self.output.capacity_size()
+ self.upper.capacity_size()
+ self.output_dependent.capacity_size()
}

pub fn count_histogram(&self, histogram: &mut HashMap<CachedDataItemType, Histogram>) {
use crate::data_storage::Storage;
for ty in self.dynamic.types() {
histogram.entry(ty).or_default().add(self.dynamic.count(ty));
}
histogram
.entry(CachedDataItemType::AggregationNumber)
.or_default()
.add(self.aggregation_number.len());
histogram
.entry(CachedDataItemType::Output)
.or_default()
.add(self.output.len());
histogram
.entry(CachedDataItemType::Upper)
.or_default()
.add(self.upper.len());
histogram
.entry(CachedDataItemType::OutputDependent)
.or_default()
.add(self.output_dependent.len());
}

pub fn size_by_type(&self, sizes: &mut HashMap<CachedDataItemType, usize>) {
use crate::data_storage::Storage;
for ty in self.dynamic.types() {
*sizes.entry(ty).or_default() += self.dynamic.size_of_type(ty);
}
*sizes
.entry(CachedDataItemType::AggregationNumber)
.or_default() += self.aggregation_number.size();
*sizes.entry(CachedDataItemType::Output).or_default() += self.output.size();
*sizes.entry(CachedDataItemType::Upper).or_default() += self.upper.size();
*sizes
.entry(CachedDataItemType::OutputDependent)
.or_default() += self.output_dependent.size();
}

pub fn persistance_state(&self) -> &PersistanceState {
&self.persistance_state
}
Expand Down Expand Up @@ -505,6 +581,58 @@ pub struct Storage {
map: DashMap<TaskId, Box<InnerStorage>, BuildHasherDefault<FxHasher>>,
}

impl Storage {
pub fn data_len(&self) -> usize {
self.map
.iter()
.map(|key_value| key_value.value().len())
.sum::<usize>()
}

pub fn data_capacity(&self) -> usize {
self.map
.iter()
.map(|key_value| key_value.value().capacity())
.sum::<usize>()
}

pub fn size(&self) -> usize {
self.map
.iter()
.map(|key_value| key_value.value().size())
.sum::<usize>()
+ self.map.len() * size_of::<(TaskId, Box<InnerStorage>, InnerStorage)>()
}

pub fn capacity_size(&self) -> usize {
self.map
.iter()
.map(|key_value| key_value.value().capacity_size())
.sum::<usize>()
+ self.map.capacity() * size_of::<(TaskId, Box<InnerStorage>, InnerStorage)>()
}

pub fn tasks(&self) -> usize {
self.map.len()
}

pub fn count_histogram(&self) -> HashMap<CachedDataItemType, Histogram> {
let mut histogram = HashMap::new();
for pair in self.map.iter() {
pair.value().count_histogram(&mut histogram);
}
histogram
}

pub fn size_by_type(&self) -> HashMap<CachedDataItemType, usize> {
let mut sizes = HashMap::new();
for pair in self.map.iter() {
pair.value().size_by_type(&mut sizes);
}
sizes
}
}

impl Storage {
pub fn new() -> Self {
let shard_amount =
Expand Down
27 changes: 27 additions & 0 deletions turbopack/crates/turbo-tasks-backend/src/data_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ pub trait Storage {
fn shrink_to_fit(&mut self);
fn is_empty(&self) -> bool;
fn len(&self) -> usize;
fn capacity(&self) -> usize;
fn size(&self) -> usize;
fn capacity_size(&self) -> usize;
fn iter(&self) -> Self::Iterator<'_>;
}

Expand Down Expand Up @@ -100,6 +103,18 @@ impl<V> Storage for OptionStorage<V> {
}
}

fn capacity(&self) -> usize {
1
}

fn size(&self) -> usize {
0
}

fn capacity_size(&self) -> usize {
0
}

fn iter(&self) -> Self::Iterator<'_> {
self.value.as_ref().map(value_to_key_value).into_iter()
}
Expand Down Expand Up @@ -190,6 +205,18 @@ impl<K: Hash + Eq, V> Storage for AutoMapStorage<K, V> {
self.map.len()
}

fn capacity(&self) -> usize {
self.map.capacity()
}

fn size(&self) -> usize {
self.map.len() * size_of::<(K, V)>()
}

fn capacity_size(&self) -> usize {
self.map.capacity() * size_of::<(K, V)>()
}

fn iter(&self) -> Self::Iterator<'_> {
self.map.iter()
}
Expand Down
52 changes: 52 additions & 0 deletions turbopack/crates/turbo-tasks-backend/src/histogram.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use core::fmt;
use std::fmt::{Debug, Formatter};

#[derive(Copy, Clone)]
pub struct Histogram {
buckets: [usize; 64],
}

impl Default for Histogram {
fn default() -> Self {
Self::new()
}
}

impl Histogram {
pub fn new() -> Self {
Self { buckets: [0; 64] }
}

pub fn add(&mut self, value: usize) {
let bucket = if value == 0 { 0 } else { value.ilog2() + 1 };
self.buckets[bucket as usize] += 1;
}

pub fn add_zero_by_total(&mut self, total: usize) {
let zero = total.saturating_sub(self.buckets.iter().sum::<usize>());
self.buckets[0] += zero;
}
}

impl Debug for Histogram {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
for (i, &count) in self.buckets.iter().enumerate() {
if count == 0 {
continue;
}
if i == 0 {
write!(f, " 0 - 0: ")?;
} else {
write!(f, "{:7} - {:7}: ", 1 << (i - 1), (1 << i) - 1)?;
}
if count < 1000 {
writeln!(f, "{}", count)?
} else if count < 1000000 {
writeln!(f, "{:.2}k", count as f64 / 1000.0)?
} else {
writeln!(f, "{:.2}M", count as f64 / 1000000.0)?
}
}
Ok(())
}
}
1 change: 1 addition & 0 deletions turbopack/crates/turbo-tasks-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod backing_storage;
mod data;
mod data_storage;
mod database;
pub mod histogram;
mod kv_backing_storage;
mod utils;

Expand Down
Loading
Loading