make answers correct in smarteole example (#3)

* add filter_all_test_wtgs_together option to cfg * improve smarteole_example.py add use_lt_distribution option change pp_analysis.py so AEP calculation is not hard coded minor updates to test_pp_analysis.py because rated power extrapolated bins are handled better * improve bootstrapping increase runs so that results have converged use tqdm * Delete check_input_data.py * Update pp_analysis.py make sure the ref turbine type is used in the reverse analysis if possible
resgroup · Apr 26, 2024 · ece2c49 · ece2c49
1 parent 487afa4
commit ece2c49
Show file tree

Hide file tree

Showing 17 changed files with 384 additions and 273 deletions.
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml
@@ -11,7 +11,9 @@ certifi==2024.2.2
 charset-normalizer==3.3.2
     # via requests
 colorama==0.4.6
-    # via pytest
+    # via
+    #   pytest
+    #   tqdm
 contourpy==1.2.1
     # via matplotlib
 coverage==7.4.4
@@ -101,6 +103,8 @@ toml==0.10.2
     # via wind-up (pyproject.toml)
 tomli==2.0.1
     # via poethepoet
+tqdm==4.66.2
+    # via wind-up (pyproject.toml)
 types-pyyaml==6.0.12.20240311
     # via wind-up (pyproject.toml)
 types-requests==2.31.0.20240406
@@ -109,6 +113,8 @@ types-tabulate==0.9.0.20240106
     # via wind-up (pyproject.toml)
 types-toml==0.10.8.20240310
     # via wind-up (pyproject.toml)
+types-tqdm==4.66.0.20240417
+    # via wind-up (pyproject.toml)
 typing-extensions==4.11.0
     # via
     #   mypy

diff --git a/examples/smarteole_example.py b/examples/smarteole_example.py
@@ -205,6 +205,8 @@ def _unpack_toggle_data() -> pd.DataFrame:
         require_ref_wake_free=True,
         detrend_min_hours=12,
         ref_wd_filter=[197.0, 246.0],
+        filter_all_test_wtgs_together=True,
+        use_lt_distribution=False,
         out_dir=OUTPUT_DIR / "smarteole_example",
         test_wtgs=[wtg_map["SMV6"], wtg_map["SMV5"]],
         ref_wtgs=[wtg_map["SMV7"]],
@@ -254,12 +256,13 @@ def _unpack_toggle_data() -> pd.DataFrame:
             [
                 "test_wtg",
                 "ref",
-                "aep_uplift_frc",
-                "aep_unc_one_sigma_frc",
-                "aep_uplift_p95_frc",
-                "aep_uplift_p5_frc",
-                "pp_hours_pre",
-                "pp_hours_post",
+                "uplift_frc",
+                "unc_one_sigma_frc",
+                "uplift_p95_frc",
+                "uplift_p5_frc",
+                "pp_valid_hours_pre",
+                "pp_valid_hours_post",
+                "mean_power_post",
             ]
         ]
 
@@ -276,14 +279,16 @@ def convert_frc_cols_to_pct(input_df: pd.DataFrame, dp: int = 1) -> pd.DataFrame
             columns={
                 "test_wtg": "turbine",
                 "ref": "reference",
-                "aep_uplift_pct": "energy uplift",
-                "aep_unc_one_sigma_pct": "uplift uncertainty",
-                "aep_uplift_p95_pct": "uplift P95",
-                "aep_uplift_p5_pct": "uplift P5",
-                "pp_hours_pre": "valid hours toggle off",
-                "pp_hours_post": "valid hours toggle on",
+                "uplift_pct": "energy uplift",
+                "unc_one_sigma_pct": "uplift uncertainty",
+                "uplift_p95_pct": "uplift P95",
+                "uplift_p5_pct": "uplift P5",
+                "pp_valid_hours_pre": "valid hours toggle off",
+                "pp_valid_hours_post": "valid hours toggle on",
+                "mean_power_post": "mean power toggle on",
             }
         )
+        print_df["mean power toggle on"] = print_df["mean power toggle on"].round(0).astype("int64")
         results_table = tabulate(
             print_df,
             headers="keys",
@@ -302,10 +307,10 @@ def convert_frc_cols_to_pct(input_df: pd.DataFrame, dp: int = 1) -> pd.DataFrame
                 "reference": ["SMV7", "SMV7"],
                 "energy uplift": ["-1.1%", "3.0%"],
                 "uplift uncertainty": ["0.6%", "1.2%"],
-                "uplift P95": ["-2.1%", "1.0%"],
-                "uplift P5": ["-0.2%", "5.1%"],
-                "valid hours toggle off": [137.8, 137.7],
-                "valid hours toggle on": [136.0, 137.2],
+                "uplift P95": ["-2.1%", "1.1%"],
+                "uplift P5": ["-0.2%", "5.0%"],
+                "valid hours toggle off": [137 + 5 / 6, 137 + 4 / 6],
+                "valid hours toggle on": [136.0, 137 + 1 / 6],
                 "mean power toggle on": [1148, 994],
             },
             index=[0, 1],

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "wind-up"
-version = "0.1.1"
+version = "0.1.2"
 description = ""
 authors = [
     { name = "Alex Clerc", email = "[email protected]" }
@@ -22,6 +22,7 @@ dependencies = [
     'tabulate',
     'toml',
     'utm',
+    'tqdm',
 ]
 
 [project.optional-dependencies]
@@ -33,6 +34,7 @@ dev = [
     'types-tabulate',
     'types-toml',
     'types-requests',
+    'types-tqdm',
     'ruff',
     'mypy',
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile --output-file=requirements.txt pyproject.toml
@@ -10,6 +10,8 @@ certifi==2024.2.2
     # via requests
 charset-normalizer==3.3.2
     # via requests
+colorama==0.4.6
+    # via tqdm
 contourpy==1.2.1
     # via matplotlib
 cycler==0.12.1
@@ -77,6 +79,8 @@ tabulate==0.9.0
     # via wind-up (pyproject.toml)
 toml==0.10.2
     # via wind-up (pyproject.toml)
+tqdm==4.66.2
+    # via wind-up (pyproject.toml)
 typing-extensions==4.11.0
     # via
     #   pydantic

diff --git a/tests/test_combine_results.py b/tests/test_combine_results.py
@@ -7,17 +7,17 @@
 
 
 def calc_expected_combine_results(trdf: pd.DataFrame) -> pd.DataFrame:
-    p50_uplift = (trdf["aep_uplift_frc"] * (1 / trdf["aep_unc_one_sigma_frc"] ** 2)).sum() / (
-        1 / trdf["aep_unc_one_sigma_frc"] ** 2
+    p50_uplift = (trdf["uplift_frc"] * (1 / trdf["unc_one_sigma_frc"] ** 2)).sum() / (
+        1 / trdf["unc_one_sigma_frc"] ** 2
     ).sum()
-    sigma_correlated = (1 / trdf["aep_unc_one_sigma_frc"]).sum() / (1 / trdf["aep_unc_one_sigma_frc"] ** 2).sum()
+    sigma_correlated = (1 / trdf["unc_one_sigma_frc"]).sum() / (1 / trdf["unc_one_sigma_frc"] ** 2).sum()
     sigma_independent = (
         (
             (
-                trdf["aep_unc_one_sigma_frc"]
+                trdf["unc_one_sigma_frc"]
                 * 1
-                / (trdf["aep_unc_one_sigma_frc"] ** 2)
-                / (1 / (trdf["aep_unc_one_sigma_frc"] ** 2)).sum()
+                / (trdf["unc_one_sigma_frc"] ** 2)
+                / (1 / (trdf["unc_one_sigma_frc"] ** 2)).sum()
             )
             ** 2
         ).sum()
@@ -27,7 +27,7 @@ def calc_expected_combine_results(trdf: pd.DataFrame) -> pd.DataFrame:
         data={
             "test_wtg": ["test1"],
             "p50_uplift": [p50_uplift],
-            "sigma_aep": [sigma_test],
+            "sigma": [sigma_test],
             "sigma_test": [sigma_test],
             "sigma_uncorr": [sigma_independent],
             "sigma_corr": [sigma_correlated],
@@ -40,8 +40,8 @@ def test_combine_two_refs() -> None:
         data={
             "test_wtg": ["test1", "test1"],
             "ref": ["ref1", "ref2"],
-            "aep_uplift_frc": [0.02, 0.02],
-            "aep_unc_one_sigma_frc": [0.02, 0.02],
+            "uplift_frc": [0.02, 0.02],
+            "unc_one_sigma_frc": [0.02, 0.02],
         },
     )
     edf = calc_expected_combine_results(trdf)
@@ -55,8 +55,8 @@ def test_combine_three_refs() -> None:
         data={
             "test_wtg": ["test1", "test1", "test1"],
             "ref": ["ref1", "ref2", "ref3"],
-            "aep_uplift_frc": [0.01, 0.02, 0.03],
-            "aep_unc_one_sigma_frc": [0.03, 0.02, 0.01],
+            "uplift_frc": [0.01, 0.02, 0.03],
+            "unc_one_sigma_frc": [0.03, 0.02, 0.01],
         },
     )
     edf = calc_expected_combine_results(trdf)

diff --git a/tests/test_data/tdf_BRT_T16_pitch.csv b/tests/test_data/tdf_BRT_T16_pitch.csv
@@ -1,4 +1,4 @@
-,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma_aep,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
+,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
 0,BRT_T06,0.015161428714338704,-0.0005284388956903613,0.030851296324367762,0.009538762205308348,0.00725893783201413,0.011818586578602566,3,False,0.009538762205308348
 1,BRT_T07,0.014496219212708562,-0.008476023058972002,0.03746846148438912,0.013966131633400533,0.010792833925243923,0.01713942934155714,3,False,0.013966131633400533
 2,BRT_T16,0.026223226119520177,0.007601990413573723,0.04484446182546663,0.011320907466069518,0.009061430995762848,0.013580383936376188,3,False,0.011320907466069518

diff --git a/tests/test_data/tdf_BRT_T16_pitch_exclude_refs.csv b/tests/test_data/tdf_BRT_T16_pitch_exclude_refs.csv
@@ -1,4 +1,4 @@
-,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma_aep,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
+,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
 0,BRT_T06,0.0148287625016823,-0.017305116303851724,0.046962641307216314,0.0195360111556492,0.0195360111556492,0.0195360111556492,1,False,0.0195360111556492
 1,BRT_T07,0.0200972287395023,-0.000665353053845328,0.04085981053284993,0.012622753449392598,0.012622753449392598,0.012622753449392598,1,False,0.012622753449392598
 2,BRT_T16,0.0323018796247317,0.01589252810844277,0.048711231141020626,0.0099761773615696,0.0099761773615696,0.0099761773615696,1,False,0.0099761773615696

diff --git a/tests/test_data/tdf_BRT_T16_pitch_no_auto_choose.csv b/tests/test_data/tdf_BRT_T16_pitch_no_auto_choose.csv
@@ -1,4 +1,4 @@
-,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma_aep,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
+,test_wtg,p50_uplift,p95_uplift,p5_uplift,sigma,sigma_uncorr,sigma_corr,ref_count,is_ref,sigma_test
 0,BRT_T06,0.017953509257755514,-0.005777477157372521,0.04168449567288354,0.014427415319082466,0.006620270805960113,0.013425652788259275,5,False,0.010022961797109695
 1,BRT_T07,0.010435940388423747,-0.013295046026704288,0.034166926803551774,0.014427415319082466,0.008799055374991732,0.01839991849578958,5,False,0.013599486935390656
 2,BRT_T16,0.025185093620704813,0.0014541072055767774,0.04891608003583284,0.014427415319082466,0.007994298546270586,0.015885872169327,5,False,0.011940085357798794

diff --git a/tests/test_data/trdf_BRT_T16_pitch_Sep23.csv b/tests/test_data/trdf_BRT_T16_pitch_Sep23.csv
@@ -1,4 +1,4 @@
-,time_calculated,test_wtg,ref,aep_uplift_frc,aep_unc_one_sigma_frc,P95_AEP_uplift_frc_final,P5_AEP_uplift_frc_final,data_coverage,distance,unc_one_sigma_frc_CatA,unc_one_sigma_frc_bootstrap,unc_one_sigma_frc_lowerbound,hours_removed_by_exclusions,hours_removed_by_detrend,test_merged_with_ref_hours,test_max_rolling_era5_ws_diff,max_rolling_era5_ws_diff,max_rolling_era5_diff,hours_used_for_LT,hours_upgraded_true,hours_upgraded_false,detrend_count,no_detrend_count,before_detrend_rsq,detrended_rsq,detrend_improvement,AEP_uplift_frc_refbinning,P95_AEP_uplift_frc_refbinning,AEP_uplift,P95_AEP_uplift,AEP_pre,AEP_post,hours_pre_upgrade,hours_post_upgrade,hours_scada_in_ws_bins,hours_scada_loaded,hours_in_upgrade_analysis,mean_test_power_pre,mean_test_power_post,mean_test_ws_est_pre,mean_test_ws_est_post,mean_ref_ws_pre,mean_ref_ws_post,mean_ref_wd_pre,mean_ref_wd_post,invalid_bin_count,invalid_bin_count_below_rated,missing_bins_uncertainty_scale_factor,poweronly_AEP_uplift_frc,reversed_AEP_uplift_frc,reversal_error,AEP_uplift_frc2,AEP_uplift_frc_P95,AEP_uplift_frc_P5
+,time_calculated,test_wtg,ref,uplift_frc,unc_one_sigma_frc,P95_AEP_uplift_frc_final,P5_AEP_uplift_frc_final,data_coverage,distance,unc_one_sigma_frc_CatA,unc_one_sigma_frc_bootstrap,unc_one_sigma_frc_lowerbound,hours_removed_by_exclusions,hours_removed_by_detrend,test_merged_with_ref_hours,test_max_rolling_era5_ws_diff,max_rolling_era5_ws_diff,max_rolling_era5_diff,hours_used_for_LT,hours_upgraded_true,hours_upgraded_false,detrend_count,no_detrend_count,before_detrend_rsq,detrended_rsq,detrend_improvement,AEP_uplift_frc_refbinning,P95_AEP_uplift_frc_refbinning,AEP_uplift,P95_AEP_uplift,AEP_pre,AEP_post,hours_pre_upgrade,hours_post_upgrade,hours_scada_in_ws_bins,hours_scada_loaded,hours_in_upgrade_analysis,mean_test_power_pre,mean_test_power_post,mean_test_ws_est_pre,mean_test_ws_est_post,mean_ref_ws_pre,mean_ref_ws_post,mean_ref_wd_pre,mean_ref_wd_post,invalid_bin_count,invalid_bin_count_below_rated,missing_bins_uncertainty_scale_factor,poweronly_AEP_uplift_frc,reversed_AEP_uplift_frc,reversal_error,AEP_uplift_frc2,AEP_uplift_frc_P95,AEP_uplift_frc_P5
 0,2023-10-03 17:20:35.981104,BRT_T06,BRT_T02,0.015378050186285797,0.01657430123684,-0.011886675348316,0.042642775720887593,0.30960377565580066,438.2584471553979,0.003968528456763835,0.014860464971931548,0.016574297291864274,108711.0,16.166666666666668,14553.0,0.599685897058361,0.7728412655932297,5.899996948242174,14961.833333333332,2469.0,533.0,325,61,0.9240882401141008,0.9599818269527542,0.0358935868386534,-0.001196247105578477,-0.0077244764169549845,-4.821251425668379,-31.13206524313955,4030.3139737478696,4025.492722322201,461.33333333333337,1889.3333333333335,5624.333333333333,7592.5,7592.5,692.6142344647707,336.87139223336163,9.234851024067712,6.439398930855908,9.09451726735649,6.078495660281968,209.98696976749275,231.2991943949236,10,1,1.000000238017676,0.007810673485879383,0.04095926806960793,0.03314859458372855,0.015378050186285797,-0.025641711984405874,0.01614274805293291
 1,2023-10-03 17:23:23.957806,BRT_T06,BRT_T03,0.015167961804999849,0.008867370466598525,0.0005811373874452744,0.029754786222554425,0.31120623422236854,245.81999321885385,0.0031000805087961956,0.004730669915258809,0.008867368356008116,108711.0,8.333333333333334,12772.0,0.599685897058361,0.6221457454648611,5.200000000000006,14961.833333333332,2469.0,533.0,312,86,0.9393120293525672,0.9738516531358765,0.034539623783309326,0.006300593448991732,0.001200961012021991,26.093677803996798,4.973736197481305,4141.463501056794,4167.55717886079,458.5,1904.333333333333,5624.333333333333,7592.5,7592.5,702.544384187382,341.0459218651236,9.115293291044274,6.334497232470266,9.182703774079343,6.122896203611295,211.626777334431,228.69143658478396,10,1,1.000000238017676,0.023510572115332605,0.04124530882734884,0.01773473671201623,0.015167961804999849,-0.0014813585616090083,0.010652271415399874
 2,2023-10-03 17:27:10.166579,BRT_T06,BRT_T04,-0.007719059943730622,0.03902514248779454,-0.07191541933615264,0.0564772994486914,0.15989463286137634,913.6420248787007,0.007176347835940186,0.018137376278193652,0.03902514248779454,108711.0,28.5,15417.666666666666,0.599685897058361,0.8193304711440179,7.000003051757813,14961.833333333332,2469.0,533.0,377,88,0.8930291963891073,0.9261873188839916,0.03315812249488437,-0.04674420243152516,-0.058549294621646764,-196.92770387546443,-246.66113771561422,4212.8797504661825,4015.952046590718,452.0,762.0,5624.333333333333,7592.5,7592.5,700.9402289634961,275.7753721694784,9.242065084136742,5.745098261073838,8.527714724344879,5.2193656616802535,211.21415693035422,244.6348471840468,10,0,1.0,-0.04531981541840625,0.032730469557182826,0.07805028497558908,-0.007719059943730622,-0.07658018640915372,-0.026853184820241658

diff --git a/tests/test_pp_analysis.py b/tests/test_pp_analysis.py
@@ -11,7 +11,7 @@
 def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -> None:
     cfg = test_lsa_t13_config
 
-    test_name = "LSA_T13"
+    test_wtg = next(x for x in cfg.asset.wtgs if x.name=="LSA_T13")
     ref_name = "LSA_T12"
     detrend_ws_col = "ref_ws_detrended"
     test_pw_col = "test_pw_clipped"
@@ -24,7 +24,7 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
     expected_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/pre_post_pp_analysis_expected_df.parquet")
     pp_results, actual_df = pre_post_pp_analysis_with_reversal(
         cfg=cfg,
-        test_name=test_name,
+        test_wtg=test_wtg,
         ref_name=ref_name,
         lt_df=lt_wtg_df_filt,
         pre_df=pre_df,
@@ -36,19 +36,27 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
         test_df=test_df,
     )
 
+    # minor changes to make actual_df compatible with old expected_df
+    expected_df["hours_for_mwh_calc"] = expected_df["hours_per_year"]
+    expected_df["hours_per_year"] = actual_df["hours_per_year"]
+    cols_with_new_calc = ["uplift_se", "uplift_p5_kw", "uplift_p95_kw"]
+    expected_df[cols_with_new_calc] = actual_df[cols_with_new_calc]
+    new_cols = ["pre_valid", "post_valid", "hours_pre_raw", "hours_post_raw", "is_invalid_bin",'pw_at_mid_expected', 'pw_sem_at_mid_expected']
+    expected_df[new_cols] = actual_df[new_cols]
+    expected_df = expected_df[actual_df.columns]
+
     assert_frame_equal(actual_df, expected_df)
-    assert pp_results["pp_hours"] == pytest.approx(10748.5)
-    assert pp_results["pp_hours_pre"] == pytest.approx(5807.333333333333)
-    assert pp_results["pp_hours_post"] == pytest.approx(4941.166666666667)
+    assert pp_results["pp_valid_hours"] == pytest.approx(10748.5)
+    assert pp_results["pp_valid_hours_pre"] == pytest.approx(5807.333333333333)
+    assert pp_results["pp_valid_hours_post"] == pytest.approx(4941.166666666667)
     assert pp_results["pp_invalid_bin_count"] == 3
     assert pp_results["pp_data_coverage"] == pytest.approx(0.6793388952092024)
-    assert pp_results["reversal_error"] == pytest.approx(-0.008728227022969562)
-    assert pp_results["aep_uplift_noadj_frc"] == pytest.approx(0.04523448345231426)
-    assert pp_results["poweronly_aep_uplift_frc"] == pytest.approx(0.04554579363613362)
-    assert pp_results["reversed_aep_uplift_frc"] == pytest.approx(0.03681756661316406)
-    assert pp_results["aep_uplift_frc"] == pytest.approx(0.04087036994082948)
+    assert pp_results["reversal_error"] == pytest.approx(-0.008786551768533796)
+    assert pp_results["uplift_noadj_frc"] == pytest.approx(0.04523448345231426)
+    assert pp_results["poweronly_uplift_frc"] == pytest.approx(0.04560411838169785)
+    assert pp_results["reversed_uplift_frc"] == pytest.approx(0.03681756661316406)
+    assert pp_results["uplift_frc"] == pytest.approx(0.040841207568047364)
     assert pp_results["missing_bins_unc_scale_factor"] == pytest.approx(1.0000000006930523)
     assert pp_results["t_value_one_sigma"] == pytest.approx(1.0000168636907854)
-    assert pp_results["t_value_conf90"] == pytest.approx(1.6449050204438032)
-    assert pp_results["aep_unc_one_sigma_lowerbound_frc"] == pytest.approx(0.004364113511484781)
-    assert pp_results["aep_unc_one_sigma_frc"] == pytest.approx(0.004364113511484781)
+    assert pp_results["unc_one_sigma_lowerbound_frc"] == pytest.approx(0.004393275884266898)
+    assert pp_results["unc_one_sigma_frc"] == pytest.approx(0.004393275884266898)
diff --git a/wind_up/check_input_data.py b/wind_up/check_input_data.py