Skip to content

Commit

Permalink
bump for 2024.2.0 major release (LLNL#212)
Browse files Browse the repository at this point in the history
Update README.md (LLNL#211)

confidence interval init

base impl
  • Loading branch information
slabasan authored and Yejashi committed Oct 16, 2024
1 parent 2b9e450 commit dca8174
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 4 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@

# Thicket

A Python-based toolkit for analyzing ensemble performance data. You can find
detailed documentation, along with tutorials of Thicket in the
A Python-based toolkit for Exploratory Data Analysis (EDA) of parallel performance data
that enables performance optimization and understanding of applications’ performance on
supercomputers. It bridges the performance tool gap between being able to consider only
a single instance of a simulation run (e.g., single platform, single measurement tool,
or single scale) and finding actionable insights in multi-dimensional, multi-scale,
multi-architecture, and multi-tool performance datasets. You can find detailed
documentation, along with tutorials of Thicket in the
[ReadtheDocs](https://thicket.readthedocs.io/en/latest/).

### Installation
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[tool.poetry]
name = "llnl-thicket"
version = "2024.1.0"
version = "2024.2.0"
description = "A Python-based toolkit for analyzing ensemble performance data."
license = "MIT"

Expand Down
1 change: 1 addition & 0 deletions thicket/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .preference import preference
from .distance import bhattacharyya_distance
from .distance import hellinger_distance
from .confidence_interval import confidence_interval


try:
Expand Down
50 changes: 50 additions & 0 deletions thicket/stats/confidence_interval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2022 Lawrence Livermore National Security, LLC and other
# Thicket Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

import numpy as np
import pandas as pd
import scipy.stats as stats

import thicket as th
from ..utils import verify_thicket_structures
from .stats_utils import cache_stats_op
from thicket.stats import mean


@cache_stats_op
def confidence_interval(thicket, columns=None, confidence_value=0.95):
output_column_names = []

mean_cols = th.stats.mean(thicket, columns=columns)
std_cols = th.stats.std(thicket, columns=columns)
sample_sizes = []
z = stats.norm.ppf((1 + confidence_value) / 2)

idx = pd.IndexSlice
for node in thicket.graph.traverse():
node_df = thicket.dataframe.loc[idx[node, :]]
sample_sizes.append(len(node_df))

for i in range(0, len(columns)):
x = thicket.statsframe.dataframe[mean_cols[i]]
s = thicket.statsframe.dataframe[std_cols[i]]
n = sample_sizes

c_p = x + (z * (s / np.sqrt(n)))
c_m = x - (z * (s / np.sqrt(n)))

out = list(zip(c_m, c_p))
out = pd.Series(out, index=thicket.statsframe.dataframe.index)

# If multi index, place below first level
out_col = f"confidence_interval_{confidence_value}_{columns[i]}"
output_column_names.append(out_col)
thicket.statsframe.dataframe[out_col] = out
break

thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1)
return output_column_names


2 changes: 1 addition & 1 deletion thicket/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
#
# SPDX-License-Identifier: MIT

__version_info__ = ("2024", "1", "0")
__version_info__ = ("2024", "2", "0")
__version__ = ".".join(__version_info__)

0 comments on commit dca8174

Please sign in to comment.