Skip to content

Commit

Permalink
Clean up PointSigmaMAD and introduce a method parameter for num_bins.
Browse files Browse the repository at this point in the history
  • Loading branch information
drewoldag committed Feb 15, 2024
1 parent cfc5449 commit be1c73d
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 121 deletions.
152 changes: 35 additions & 117 deletions src/qp/metrics/parallel_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -88,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -122,54 +122,18 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.844492171486455"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"PointSigmaIQR().evaluate(estimate, reference)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>\n",
"100%|██████████| 4/4 [00:05<00:00, 1.48s/engine]\n",
"mpi_example: 100%|██████████| 10/10 [00:00<00:00, 12.39tasks/s]\n",
"0 : (296, 2)\n",
"1 : (295, 2)\n",
"2 : (298, 2)\n",
"3 : (299, 2)\n",
"4 : (296, 2)\n",
"5 : (297, 2)\n",
"6 : (295, 2)\n",
"7 : (295, 2)\n",
"8 : (294, 2)\n",
"9 : (297, 2)\n",
"1.8458703638788456\n",
"Stopping engine(s): 1708026063\n",
"engine set stopped 1708026063: {'exit_code': 0, 'pid': 40773, 'identifier': 'ipengine-1708026062-ui56-1708026063-40755'}\n",
"Stopping controller\n",
"Controller stopped: {'exit_code': 0, 'pid': 40761, 'identifier': 'ipcontroller-1708026062-ui56-40755'}\n"
]
}
],
"outputs": [],
"source": [
"run_parallel_metric(PointSigmaIQR(), iqr_data_chunks)"
]
Expand All @@ -183,54 +147,18 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.15921544705180912"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"PointBias().evaluate(estimate, reference)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>\n",
"100%|██████████| 4/4 [00:05<00:00, 1.49s/engine]\n",
"mpi_example: 100%|██████████| 10/10 [00:00<00:00, 12.51tasks/s]\n",
"0 : (296, 2)\n",
"1 : (295, 2)\n",
"2 : (298, 2)\n",
"3 : (299, 2)\n",
"4 : (296, 2)\n",
"5 : (297, 2)\n",
"6 : (295, 2)\n",
"7 : (295, 2)\n",
"8 : (294, 2)\n",
"9 : (297, 2)\n",
"-0.15852842748117044\n",
"Stopping engine(s): 1708026071\n",
"engine set stopped 1708026071: {'exit_code': 0, 'pid': 40822, 'identifier': 'ipengine-1708026070-ddho-1708026071-40755'}\n",
"Stopping controller\n",
"Controller stopped: {'exit_code': 0, 'pid': 40810, 'identifier': 'ipcontroller-1708026070-ddho-40755'}\n"
]
}
],
"outputs": [],
"source": [
"run_parallel_metric(PointBias(), point_bias_data_chunks)"
]
Expand All @@ -244,7 +172,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -256,53 +184,43 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0738614584809976"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"PointSigmaMAD().evaluate(estimate, reference)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This cell allows for adjustment of the `num_bins` parameter.\n",
"\n",
"Larger values trend closer to the analytic result from the cell above."
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.101290495249205"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"psmad = PointSigmaMAD()\n",
"centroid_1 = psmad.accumulate(estimate[0:5000], reference[0:5000])\n",
"centroid_2 = psmad.accumulate(estimate[5000:], reference[5000:])\n",
"psmad.finalize(centroids=[centroid_1, centroid_2])\n"
"centroids = psmad.accumulate(estimate, reference)\n",
"\n",
"#default value for `num_bins` is 1_000_000\n",
"psmad.finalize(centroids=[centroids], num_bins=1_000_000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"run_parallel_metric(PointSigmaMAD(), point_sigma_mad_data_chunks)"
]
}
],
"metadata": {
Expand Down
7 changes: 3 additions & 4 deletions src/qp/metrics/point_estimate_metric_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def accumulate(self, estimate, reference):
centroids = digest.get_centroids()
return centroids

Check warning on line 220 in src/qp/metrics/point_estimate_metric_classes.py

View check run for this annotation

Codecov / codecov/patch

src/qp/metrics/point_estimate_metric_classes.py#L217-L220

Added lines #L217 - L220 were not covered by tests

def finalize(self, centroids=None):
def finalize(self, centroids=None, num_bins=1_000_000):
digests = (

Check warning on line 223 in src/qp/metrics/point_estimate_metric_classes.py

View check run for this annotation

Codecov / codecov/patch

src/qp/metrics/point_estimate_metric_classes.py#L223

Added line #L223 was not covered by tests
TDigest.of_centroids(np.array(centroid), compression=1000)
for centroid in centroids
Expand All @@ -229,11 +229,10 @@ def finalize(self, centroids=None):
SCALE_FACTOR = 1.4826

Check warning on line 229 in src/qp/metrics/point_estimate_metric_classes.py

View check run for this annotation

Codecov / codecov/patch

src/qp/metrics/point_estimate_metric_classes.py#L229

Added line #L229 was not covered by tests

# calculation of `np.median(np.fabs(ez - np.median(ez)))` as suggested by Eric Charles
this_median = digest.inverse_cdf([0.50])[0]
lots_of_bins = 100000
this_median = digest.inverse_cdf(0.50)
this_min = digest.inverse_cdf(0)
this_max = digest.inverse_cdf(1)
bins = np.linspace(this_min, this_max, lots_of_bins)
bins = np.linspace(this_min, this_max, num_bins)
this_pdf = digest.cdf(bins[1:]) - digest.cdf(bins[0:-1]) # len(this_pdf) = lots_of_bins - 1
bin_dist = np.fabs(bins - this_median) # get the distance to the center for each bin in the hist

Check warning on line 237 in src/qp/metrics/point_estimate_metric_classes.py

View check run for this annotation

Codecov / codecov/patch

src/qp/metrics/point_estimate_metric_classes.py#L232-L237

Added lines #L232 - L237 were not covered by tests

Expand Down

0 comments on commit be1c73d

Please sign in to comment.