Skip to content

Commit

Permalink
docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
(DHSC) Annabel Westermann authored and (DHSC) Annabel Westermann committed May 17, 2024
1 parent d1ac6df commit c4eca75
Show file tree
Hide file tree
Showing 16 changed files with 69 additions and 1,337 deletions.
8 changes: 2 additions & 6 deletions PHStatsMethods/DSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,8 @@ def ph_dsr(df, num_col, denom_col, ref_denom_col, group_cols = None, metadata =
**kwargs:
ref_df: DataFrame of reference data to join.
ref_join_left: A string or list of column name(s) in `df` to join on to.
ref_join_right: A string or list of column name(s) in `ref_df` to join on to.
ref_join_left (str | list): A string or list of column name(s) in `df` to join on to.
ref_join_right (str | list): A string or list of column name(s) in `ref_df` to join on to.
Returns:
DataFrame of calculated rates and confidence intervals
Expand All @@ -56,8 +54,6 @@ def ph_dsr(df, num_col, denom_col, ref_denom_col, group_cols = None, metadata =
(2) Dobson A et al. Confidence intervals for weighted sums of Poisson parameters. Stat Med 1991;10:457-62.
"""

df = df.copy().reset_index(drop=True)

if not isinstance(multiplier, int) or multiplier <= 0:
raise ValueError("'Multiplier' must be a positive integer")
Expand Down
17 changes: 10 additions & 7 deletions PHStatsMethods/ISRate.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,18 @@ def ph_ISRate(df, num_col, denom_col, ref_num_col, ref_denom_col, group_cols = N
**kwargs:
ref_df
ref_join_left
ref_join_right
obs_df
obs_join_left
obs_join_right
ref_df: DataFrame of reference data to join.
ref_join_left (str | list): A string or list of column name(s) in `df` to join on to.
ref_join_right (str | list): A string or list of column name(s) in `ref_df` to join on to.
obs_df: DataFrame of total observed events for each group.
obs_join_left (str | list): A string or list of column name(s) in `df` to join on to.
obs_join_right (str | list): A string or list of column name(s) in `obs_df` to join on to.
Returns:
df: Dataframe containing calculated IS Rates.
"""

df = df.copy()
confidence, group_cols = format_args(confidence, group_cols)
ref_df, ref_join_left, ref_join_right = check_kwargs(df, kwargs, 'ref', ref_num_col, ref_denom_col)
obs_df, obs_join_left, obs_join_right = check_kwargs(df, kwargs, 'obs', num_col)
Expand Down
12 changes: 6 additions & 6 deletions PHStatsMethods/ISRatio.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def ph_ISRatio(df, num_col, denom_col, ref_num_col, ref_denom_col, group_cols =
refvalue (int): the standardised reference ratio, default = 1
**kwargs:
ref_df
ref_join_left
ref_join_right
obs_df
obs_join_left
obs_join_right
ref_df: DataFrame of reference data to join.
ref_join_left (str | list): A string or list of column name(s) in `df` to join on to.
ref_join_right (str | list): A string or list of column name(s) in `ref_df` to join on to.
obs_df: DataFrame of total observed events for each group.
obs_join_left (str | list): A string or list of column name(s) in `df` to join on to.
obs_join_right (str | list): A string or list of column name(s) in `obs_df` to join on to.
Returns:
df: Dataframe containing calculated IS Ratios.
Expand Down
55 changes: 47 additions & 8 deletions PHStatsMethods/funnels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,25 @@


def calculate_funnel_limits(df, num_col, statistic, multiplier, denom_col = None, metadata = True,
rate = None, ratio_type = None, rate_type = None, years_of_data = None):
rate = None, rate_type = None, ratio_type = None, years_of_data = None):
"""Calculates control limits adopting a consistent method as per the Fingertips Technical Guidance
Args:
df: DataFrame containing the data to calculate control limits for.
num_col (str): Name of column containing observed number of cases in the sample
(the numerator of the population).
statistic (str): type of statistic to inform funnel calculations: 'proportion', 'rate', or 'ratio'
multiplier (int): multiplier used to express the final values (e.g. 100 = percentage)
denom_col (str): Name of column containing number of cases in sample
(the denominator of the population).
group_cols (str | list): A string or list of column name(s) to group the data by.
Defaults to None.
metadata (bool): Whether to include information on the statistic and confidence interval methods.
confidence (float): Confidence interval(s) to use, either as a float, list of float values or None.
Confidence intervals must be between 0.9 and 1. Defaults to 0.95 (2 std from mean).
multiplier (int): multiplier used to express the final values (e.g. 100 = percentage)
rate (str): column name containing the 'rate'.
rate_type (str): if statistic is 'rate', specify either 'dsr' or 'crude'.
ratio_type (str): if statistic is 'ratio', specify either 'count' or 'isr' (indirectly standardised ratio).
years_of_data (int): number of years the data represents; this is required if statistic is 'ratio'
Returns:
DataFrame of calculated proportion statistics with confidence intervals (df).
DataFrame of calculated confidence limits.
"""

Expand Down Expand Up @@ -170,7 +171,26 @@ def calculate_funnel_limits(df, num_col, statistic, multiplier, denom_col = None



def assign_funnel_significance(df, num_col, denom_col, statistic, rate = None, rate_type = None, multiplier = None):
def assign_funnel_significance(df, num_col, statistic, denom_col = None, rate = None, rate_type = None, multiplier = None):
"""Identifies whether each value in a dataset falls outside of 95 and/or 99.8 percent control limits based on the
aggregated average value across the whole dataset as an indicator of statistically significant difference.
Args:
df: DataFrame containing the data to calculate control limits for.
num_col (str): Name of column containing observed number of cases in the sample
(the numerator of the population).
statistic (str): type of statistic to inform funnel calculations: 'proportion', 'rate', or 'ratio'
denom_col (str): Name of column containing number of cases in sample
(the denominator of the population).
metadata (bool): Whether to include information on the statistic and confidence interval methods.
rate (str): column name containing the 'rate'.
rate_type (str): if statistic is 'rate', specify either 'dsr' or 'crude'.
multiplier (int): multiplier the rate is normalised with (i.e. per 100000) only required when statistic is 'rate'.
Returns:
DataFrame of calculated significance levels.
"""

if statistic not in ['rate', 'proportion', 'ratio']:
raise ValueError("'statistic' must be either 'proportion', 'ratio' or 'rate")
Expand Down Expand Up @@ -250,6 +270,25 @@ def assign_funnel_significance(df, num_col, denom_col, statistic, rate = None, r

def calculate_funnel_points(df, num_col, rate, rate_type, denom_col = None,
multiplier = 100000, years_of_data = 1):
"""For rate-based funnels: Derive rate and annual population values for charting based. Process removes rates where the
rate type is dsr and the number of observed events are below 10.
Args:
df: DataFrame containing the data to calculate control limits for.
num_col (str): Name of column containing observed number of cases in the sample
(the numerator of the population).
statistic (str): type of statistic to inform funnel calculations: 'proportion', 'rate', or 'ratio'
denom_col (str): Name of column containing number of cases in sample
(the denominator of the population).
metadata (bool): Whether to include information on the statistic and confidence interval methods.
years_of_data (int): number of years the data represents
multiplier (int): multiplier the rate is normalised with (i.e. per 100000).
Returns:
DataFrame of calculated funnel points. First will have the same name as the rate field,
with the suffix '_chart', the second will be called denominator_derived.
"""

df = validate_data(df, num_col, denom_col = denom_col)

Expand Down
3 changes: 0 additions & 3 deletions PHStatsMethods/proportions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ def ph_proportion(df, num_col, denom_col, group_cols = None, metadata = True, co
"""

# Ensure original df remains unchanged
df = df.copy()

# Check data and arguments
confidence, group_cols = format_args(confidence, group_cols)
df = validate_data(df, num_col, group_cols, metadata, denom_col)
Expand Down
Loading

0 comments on commit c4eca75

Please sign in to comment.