Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into vega-datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
dangotbanned committed Jan 18, 2025
2 parents ba01af1 + a79b934 commit 80647b6
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 3 deletions.
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ doc = [
"sphinx_copybutton",
"sphinx-design",
"scipy",
"scipy-stubs; python_version>=\"3.10\"",
]

[tool.altair.vega]
Expand Down Expand Up @@ -325,8 +326,17 @@ module = [
"ipykernel.*",
"ibis.*",
"vegafusion.*",
"scipy.*"
]
ignore_missing_imports = true
disable_error_code = ["import-untyped"]

[[tool.mypy.overrides]]
module = [
"tests/examples_arguments_syntax.*",
"tests/examples_methods_syntax.*",
]
disable_error_code = ["annotation-unchecked"]

[tool.pyright]
enableExperimentalFeatures=true
Expand Down
6 changes: 3 additions & 3 deletions sphinxext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def create_generic_image(
"""


def _parse_source_file(filename: str) -> tuple[ast.Module | None, str]:
def _parse_source_file(filename: str | Path) -> tuple[ast.Module | None, str]:
"""
Parse source file into AST node.
Expand Down Expand Up @@ -88,7 +88,7 @@ def _parse_source_file(filename: str) -> tuple[ast.Module | None, str]:
return node, content


def get_docstring_and_rest(filename: str) -> tuple[str, str | None, str, int]:
def get_docstring_and_rest(filename: str | Path) -> tuple[str, str | None, str, int]:
"""
Separate ``filename`` content between docstring and the rest.
Expand Down Expand Up @@ -160,7 +160,7 @@ def get_docstring_and_rest(filename: str) -> tuple[str, str | None, str, int]:
if (
node.body
and isinstance(node.body[0], ast.Expr)
and isinstance(node.body[0].value, (ast.Str, ast.Constant))
and isinstance(node.body[0].value, ast.Constant)
):
docstring_node = node.body[0]
docstring = docstring_node.value.s # pyright: ignore[reportAttributeAccessIssue]
Expand Down
5 changes: 5 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ def windows_has_tzdata() -> bool:
https://github.com/vega/vegafusion
"""

skip_requires_scipy: pytest.MarkDecorator = pytest.mark.skipif(
find_spec("scipy") is None, reason="`scipy` not installed."
)


@overload
def skip_requires_pyarrow(
Expand Down Expand Up @@ -236,6 +240,7 @@ def _distributed_examples(
"wind_vector_map": slow,
r"\.point_map\.py": slow,
"line_chart_with_color_datum": slow,
"deviation_ellipses": skip_requires_scipy,
},
)
),
Expand Down
88 changes: 88 additions & 0 deletions tests/examples_arguments_syntax/deviation_ellipses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Confidence Interval Ellipses
----------------------------
This example shows bivariate deviation ellipses of petal length and width of three iris species.
Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_
.. _ggplot2.stat_ellipse:
https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples
.. _Deviation ellipses example:
https://github.com/vega/altair/pull/514
.. _@essicolo:
https://github.com/essicolo
"""

# category: case studies
import numpy as np
import pandas as pd
from scipy.stats import f as F

import altair as alt
from vega_datasets import data


def confidence_region_2d(arr, conf_level=0.95, segments=50):
"""
Calculate confidence interval ellipse.
Parameters
----------
arr
numpy array with 2 columns
conf_level
lower tail probability
segments
number of points describing the ellipse.
"""
n_elements = len(arr)
# Degrees of freedom of the chi-squared distribution in the **numerator**
dfn = 2
# Degrees of freedom of the chi-squared distribution in the **denominator**
dfd = n_elements - 1
# Percent point function at `conf_level` of an F continuous random variable
quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd)
radius = np.sqrt(2 * quantile)
angles = np.arange(0, segments) * 2 * np.pi / segments
circle = np.column_stack((np.cos(angles), np.sin(angles)))
center = np.mean(arr, axis=0)
cov_mat = np.cov(arr, rowvar=False)
return center + radius * (circle @ np.linalg.cholesky(cov_mat).T)


def grouped_confidence_regions(df, col_x, col_y, col_group):
cols = [col_x, col_y]
ellipses = []
ser: pd.Series[float] = df[col_group]
for group in ser.drop_duplicates():
arr = df.loc[ser == group, cols].to_numpy()
ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols)
ellipse[col_group] = group
ellipses.append(ellipse)
return pd.concat(ellipses).reset_index(names="order")


col_x = "petalLength"
col_y = "petalWidth"
col_group = "species"

x = alt.X(col_x, scale=alt.Scale(zero=False))
y = alt.Y(col_y, scale=alt.Scale(zero=False))
color = alt.Color(col_group)

source = data.iris()
ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group)
points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode(
x=x,
y=y,
color=color
)
lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode(
x=x,
y=y,
color=color,
order="order"
)

chart = (lines + points).properties(height=500, width=500)
chart
88 changes: 88 additions & 0 deletions tests/examples_methods_syntax/deviation_ellipses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Confidence Interval Ellipses
----------------------------
This example shows bivariate deviation ellipses of petal length and width of three iris species.
Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_
.. _ggplot2.stat_ellipse:
https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples
.. _Deviation ellipses example:
https://github.com/vega/altair/pull/514
.. _@essicolo:
https://github.com/essicolo
"""

# category: case studies
import numpy as np
import pandas as pd
from scipy.stats import f as F

import altair as alt
from vega_datasets import data


def confidence_region_2d(arr, conf_level=0.95, segments=50):
"""
Calculate confidence interval ellipse.
Parameters
----------
arr
numpy array with 2 columns
conf_level
lower tail probability
segments
number of points describing the ellipse.
"""
n_elements = len(arr)
# Degrees of freedom of the chi-squared distribution in the **numerator**
dfn = 2
# Degrees of freedom of the chi-squared distribution in the **denominator**
dfd = n_elements - 1
# Percent point function at `conf_level` of an F continuous random variable
quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd)
radius = np.sqrt(2 * quantile)
angles = np.arange(0, segments) * 2 * np.pi / segments
circle = np.column_stack((np.cos(angles), np.sin(angles)))
center = np.mean(arr, axis=0)
cov_mat = np.cov(arr, rowvar=False)
return center + radius * (circle @ np.linalg.cholesky(cov_mat).T)


def grouped_confidence_regions(df, col_x, col_y, col_group):
cols = [col_x, col_y]
ellipses = []
ser: pd.Series[float] = df[col_group]
for group in ser.drop_duplicates():
arr = df.loc[ser == group, cols].to_numpy()
ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols)
ellipse[col_group] = group
ellipses.append(ellipse)
return pd.concat(ellipses).reset_index(names="order")


col_x = "petalLength"
col_y = "petalWidth"
col_group = "species"

x = alt.X(col_x).scale(zero=False)
y = alt.Y(col_y).scale(zero=False)
color = alt.Color(col_group)

source = data.iris()
ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group)
points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode(
x=x,
y=y,
color=color
)
lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode(
x=x,
y=y,
color=color,
order="order"
)

chart = (lines + points).properties(height=500, width=500)
chart
26 changes: 26 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 80647b6

Please sign in to comment.